2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (mode_is_signed(mode)) {
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1302 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1303 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1305 return proj_res_high;
1309 * Creates an ia32 And.
1311 * @return The created ia32 And node
1313 static ir_node *gen_And(ir_node *node)
1315 ir_node *op1 = get_And_left(node);
1316 ir_node *op2 = get_And_right(node);
1317 assert(! mode_is_float(get_irn_mode(node)));
1319 /* is it a zero extension? */
1320 if (is_Const(op2)) {
1321 tarval *tv = get_Const_tarval(op2);
1322 long v = get_tarval_long(tv);
1324 if (v == 0xFF || v == 0xFFFF) {
1325 dbg_info *dbgi = get_irn_dbg_info(node);
1326 ir_node *block = get_nodes_block(node);
1333 assert(v == 0xFFFF);
1336 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1341 return gen_binop(node, op1, op2, new_bd_ia32_And,
1342 match_commutative | match_mode_neutral | match_am | match_immediate);
1348 * Creates an ia32 Or.
1350 * @return The created ia32 Or node
1352 static ir_node *gen_Or(ir_node *node)
1354 ir_node *op1 = get_Or_left(node);
1355 ir_node *op2 = get_Or_right(node);
1357 assert (! mode_is_float(get_irn_mode(node)));
1358 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1359 | match_mode_neutral | match_am | match_immediate);
1365 * Creates an ia32 Eor.
1367 * @return The created ia32 Eor node
1369 static ir_node *gen_Eor(ir_node *node)
1371 ir_node *op1 = get_Eor_left(node);
1372 ir_node *op2 = get_Eor_right(node);
1374 assert(! mode_is_float(get_irn_mode(node)));
1375 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1376 | match_mode_neutral | match_am | match_immediate);
1381 * Creates an ia32 Sub.
1383 * @return The created ia32 Sub node
1385 static ir_node *gen_Sub(ir_node *node)
1387 ir_node *op1 = get_Sub_left(node);
1388 ir_node *op2 = get_Sub_right(node);
1389 ir_mode *mode = get_irn_mode(node);
1391 if (mode_is_float(mode)) {
1392 if (ia32_cg_config.use_sse2)
1393 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1395 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1398 if (is_Const(op2)) {
1399 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1403 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1404 | match_am | match_immediate);
1407 static ir_node *transform_AM_mem(ir_node *const block,
1408 ir_node *const src_val,
1409 ir_node *const src_mem,
1410 ir_node *const am_mem)
1412 if (is_NoMem(am_mem)) {
1413 return be_transform_node(src_mem);
1414 } else if (is_Proj(src_val) &&
1416 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1417 /* avoid memory loop */
1419 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1420 ir_node *const ptr_pred = get_Proj_pred(src_val);
1421 int const arity = get_Sync_n_preds(src_mem);
1426 NEW_ARR_A(ir_node*, ins, arity + 1);
1428 /* NOTE: This sometimes produces dead-code because the old sync in
1429 * src_mem might not be used anymore, we should detect this case
1430 * and kill the sync... */
1431 for (i = arity - 1; i >= 0; --i) {
1432 ir_node *const pred = get_Sync_pred(src_mem, i);
1434 /* avoid memory loop */
1435 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1438 ins[n++] = be_transform_node(pred);
1443 return new_r_Sync(block, n, ins);
1447 ins[0] = be_transform_node(src_mem);
1449 return new_r_Sync(block, 2, ins);
1454 * Create a 32bit to 64bit signed extension.
1456 * @param dbgi debug info
1457 * @param block the block where node nodes should be placed
1458 * @param val the value to extend
1459 * @param orig the original node
1461 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1462 ir_node *val, const ir_node *orig)
1467 if (ia32_cg_config.use_short_sex_eax) {
1468 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1469 be_dep_on_frame(pval);
1470 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1472 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1473 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1475 SET_IA32_ORIG_NODE(res, orig);
1480 * Generates an ia32 DivMod with additional infrastructure for the
1481 * register allocator if needed.
1483 static ir_node *create_Div(ir_node *node)
1485 dbg_info *dbgi = get_irn_dbg_info(node);
1486 ir_node *block = get_nodes_block(node);
1487 ir_node *new_block = be_transform_node(block);
1494 ir_node *sign_extension;
1495 ia32_address_mode_t am;
1496 ia32_address_t *addr = &am.addr;
1498 /* the upper bits have random contents for smaller modes */
1499 switch (get_irn_opcode(node)) {
1501 op1 = get_Div_left(node);
1502 op2 = get_Div_right(node);
1503 mem = get_Div_mem(node);
1504 mode = get_Div_resmode(node);
1507 op1 = get_Mod_left(node);
1508 op2 = get_Mod_right(node);
1509 mem = get_Mod_mem(node);
1510 mode = get_Mod_resmode(node);
1513 op1 = get_DivMod_left(node);
1514 op2 = get_DivMod_right(node);
1515 mem = get_DivMod_mem(node);
1516 mode = get_DivMod_resmode(node);
1519 panic("invalid divmod node %+F", node);
1522 match_arguments(&am, block, op1, op2, NULL, match_am);
1524 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1525 is the memory of the consumed address. We can have only the second op as address
1526 in Div nodes, so check only op2. */
1527 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1529 if (mode_is_signed(mode)) {
1530 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1531 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1532 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1534 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1535 be_dep_on_frame(sign_extension);
1537 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1538 addr->index, new_mem, am.new_op2,
1539 am.new_op1, sign_extension);
1542 set_irn_pinned(new_node, get_irn_pinned(node));
1544 set_am_attributes(new_node, &am);
1545 SET_IA32_ORIG_NODE(new_node, node);
1547 new_node = fix_mem_proj(new_node, &am);
1553 * Generates an ia32 Mod.
1555 static ir_node *gen_Mod(ir_node *node)
1557 return create_Div(node);
1561 * Generates an ia32 Div.
1563 static ir_node *gen_Div(ir_node *node)
1565 return create_Div(node);
1569 * Generates an ia32 DivMod.
1571 static ir_node *gen_DivMod(ir_node *node)
1573 return create_Div(node);
1579 * Creates an ia32 floating Div.
1581 * @return The created ia32 xDiv node
1583 static ir_node *gen_Quot(ir_node *node)
1585 ir_node *op1 = get_Quot_left(node);
1586 ir_node *op2 = get_Quot_right(node);
1588 if (ia32_cg_config.use_sse2) {
1589 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1591 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1597 * Creates an ia32 Shl.
1599 * @return The created ia32 Shl node
1601 static ir_node *gen_Shl(ir_node *node)
1603 ir_node *left = get_Shl_left(node);
1604 ir_node *right = get_Shl_right(node);
1606 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1607 match_mode_neutral | match_immediate);
1611 * Creates an ia32 Shr.
1613 * @return The created ia32 Shr node
1615 static ir_node *gen_Shr(ir_node *node)
1617 ir_node *left = get_Shr_left(node);
1618 ir_node *right = get_Shr_right(node);
1620 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1626 * Creates an ia32 Sar.
1628 * @return The created ia32 Shrs node
1630 static ir_node *gen_Shrs(ir_node *node)
1632 ir_node *left = get_Shrs_left(node);
1633 ir_node *right = get_Shrs_right(node);
1635 if (is_Const(right)) {
1636 tarval *tv = get_Const_tarval(right);
1637 long val = get_tarval_long(tv);
1639 /* this is a sign extension */
1640 dbg_info *dbgi = get_irn_dbg_info(node);
1641 ir_node *block = be_transform_node(get_nodes_block(node));
1642 ir_node *new_op = be_transform_node(left);
1644 return create_sex_32_64(dbgi, block, new_op, node);
1648 /* 8 or 16 bit sign extension? */
1649 if (is_Const(right) && is_Shl(left)) {
1650 ir_node *shl_left = get_Shl_left(left);
1651 ir_node *shl_right = get_Shl_right(left);
1652 if (is_Const(shl_right)) {
1653 tarval *tv1 = get_Const_tarval(right);
1654 tarval *tv2 = get_Const_tarval(shl_right);
1655 if (tv1 == tv2 && tarval_is_long(tv1)) {
1656 long val = get_tarval_long(tv1);
1657 if (val == 16 || val == 24) {
1658 dbg_info *dbgi = get_irn_dbg_info(node);
1659 ir_node *block = get_nodes_block(node);
1669 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1678 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1684 * Creates an ia32 Rol.
1686 * @param op1 The first operator
1687 * @param op2 The second operator
1688 * @return The created ia32 RotL node
1690 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1692 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1698 * Creates an ia32 Ror.
1699 * NOTE: There is no RotR with immediate because this would always be a RotL
1700 * "imm-mode_size_bits" which can be pre-calculated.
1702 * @param op1 The first operator
1703 * @param op2 The second operator
1704 * @return The created ia32 RotR node
1706 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1708 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1714 * Creates an ia32 RotR or RotL (depending on the found pattern).
1716 * @return The created ia32 RotL or RotR node
1718 static ir_node *gen_Rotl(ir_node *node)
1720 ir_node *rotate = NULL;
1721 ir_node *op1 = get_Rotl_left(node);
1722 ir_node *op2 = get_Rotl_right(node);
1724 /* Firm has only RotL, so we are looking for a right (op2)
1725 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1726 that means we can create a RotR instead of an Add and a RotL */
1730 ir_node *left = get_Add_left(add);
1731 ir_node *right = get_Add_right(add);
1732 if (is_Const(right)) {
1733 tarval *tv = get_Const_tarval(right);
1734 ir_mode *mode = get_irn_mode(node);
1735 long bits = get_mode_size_bits(mode);
1737 if (is_Minus(left) &&
1738 tarval_is_long(tv) &&
1739 get_tarval_long(tv) == bits &&
1742 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1743 rotate = gen_Ror(node, op1, get_Minus_op(left));
1748 if (rotate == NULL) {
1749 rotate = gen_Rol(node, op1, op2);
1758 * Transforms a Minus node.
1760 * @return The created ia32 Minus node
1762 static ir_node *gen_Minus(ir_node *node)
1764 ir_node *op = get_Minus_op(node);
1765 ir_node *block = be_transform_node(get_nodes_block(node));
1766 dbg_info *dbgi = get_irn_dbg_info(node);
1767 ir_mode *mode = get_irn_mode(node);
1772 if (mode_is_float(mode)) {
1773 ir_node *new_op = be_transform_node(op);
1774 if (ia32_cg_config.use_sse2) {
1775 /* TODO: non-optimal... if we have many xXors, then we should
1776 * rather create a load for the const and use that instead of
1777 * several AM nodes... */
1778 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1780 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1781 nomem, new_op, noreg_xmm);
1783 size = get_mode_size_bits(mode);
1784 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1786 set_ia32_am_sc(new_node, ent);
1787 set_ia32_op_type(new_node, ia32_AddrModeS);
1788 set_ia32_ls_mode(new_node, mode);
1790 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1793 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1796 SET_IA32_ORIG_NODE(new_node, node);
1802 * Transforms a Not node.
1804 * @return The created ia32 Not node
1806 static ir_node *gen_Not(ir_node *node)
1808 ir_node *op = get_Not_op(node);
1810 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1811 assert (! mode_is_float(get_irn_mode(node)));
1813 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1819 * Transforms an Abs node.
1821 * @return The created ia32 Abs node
1823 static ir_node *gen_Abs(ir_node *node)
1825 ir_node *block = get_nodes_block(node);
1826 ir_node *new_block = be_transform_node(block);
1827 ir_node *op = get_Abs_op(node);
1828 dbg_info *dbgi = get_irn_dbg_info(node);
1829 ir_mode *mode = get_irn_mode(node);
1835 if (mode_is_float(mode)) {
1836 new_op = be_transform_node(op);
1838 if (ia32_cg_config.use_sse2) {
1839 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1840 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1841 nomem, new_op, noreg_fp);
1843 size = get_mode_size_bits(mode);
1844 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1846 set_ia32_am_sc(new_node, ent);
1848 SET_IA32_ORIG_NODE(new_node, node);
1850 set_ia32_op_type(new_node, ia32_AddrModeS);
1851 set_ia32_ls_mode(new_node, mode);
1853 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1854 SET_IA32_ORIG_NODE(new_node, node);
1857 ir_node *xor, *sign_extension;
1859 if (get_mode_size_bits(mode) == 32) {
1860 new_op = be_transform_node(op);
1862 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1865 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1867 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1868 nomem, new_op, sign_extension);
1869 SET_IA32_ORIG_NODE(xor, node);
1871 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, xor, sign_extension);
1873 SET_IA32_ORIG_NODE(new_node, node);
1880 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1882 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1884 dbg_info *dbgi = get_irn_dbg_info(cmp);
1885 ir_node *block = get_nodes_block(cmp);
1886 ir_node *new_block = be_transform_node(block);
1887 ir_node *op1 = be_transform_node(x);
1888 ir_node *op2 = be_transform_node(n);
1890 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1894 * Transform a node returning a "flag" result.
1896 * @param node the node to transform
1897 * @param pnc_out the compare mode to use
1899 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1906 /* we have a Cmp as input */
1907 if (is_Proj(node)) {
1908 ir_node *pred = get_Proj_pred(node);
1910 pn_Cmp pnc = get_Proj_proj(node);
1911 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1912 ir_node *l = get_Cmp_left(pred);
1913 ir_node *r = get_Cmp_right(pred);
1915 ir_node *la = get_And_left(l);
1916 ir_node *ra = get_And_right(l);
1918 ir_node *c = get_Shl_left(la);
1919 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1920 /* (1 << n) & ra) */
1921 ir_node *n = get_Shl_right(la);
1922 flags = gen_bt(pred, ra, n);
1923 /* we must generate a Jc/Jnc jump */
1924 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1927 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1932 ir_node *c = get_Shl_left(ra);
1933 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1934 /* la & (1 << n)) */
1935 ir_node *n = get_Shl_right(ra);
1936 flags = gen_bt(pred, la, n);
1937 /* we must generate a Jc/Jnc jump */
1938 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1941 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1947 flags = be_transform_node(pred);
1953 /* a mode_b value, we have to compare it against 0 */
1954 dbgi = get_irn_dbg_info(node);
1955 new_block = be_transform_node(get_nodes_block(node));
1956 new_op = be_transform_node(node);
1957 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1958 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1959 *pnc_out = pn_Cmp_Lg;
1964 * Transforms a Load.
1966 * @return the created ia32 Load node
1968 static ir_node *gen_Load(ir_node *node)
1970 ir_node *old_block = get_nodes_block(node);
1971 ir_node *block = be_transform_node(old_block);
1972 ir_node *ptr = get_Load_ptr(node);
1973 ir_node *mem = get_Load_mem(node);
1974 ir_node *new_mem = be_transform_node(mem);
1977 dbg_info *dbgi = get_irn_dbg_info(node);
1978 ir_mode *mode = get_Load_mode(node);
1981 ia32_address_t addr;
1983 /* construct load address */
1984 memset(&addr, 0, sizeof(addr));
1985 ia32_create_address_mode(&addr, ptr, 0);
1992 base = be_transform_node(base);
1995 if (index == NULL) {
1998 index = be_transform_node(index);
2001 if (mode_is_float(mode)) {
2002 if (ia32_cg_config.use_sse2) {
2003 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2005 res_mode = mode_xmm;
2007 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2009 res_mode = mode_vfp;
2012 assert(mode != mode_b);
2014 /* create a conv node with address mode for smaller modes */
2015 if (get_mode_size_bits(mode) < 32) {
2016 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2017 new_mem, noreg_GP, mode);
2019 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2024 set_irn_pinned(new_node, get_irn_pinned(node));
2025 set_ia32_op_type(new_node, ia32_AddrModeS);
2026 set_ia32_ls_mode(new_node, mode);
2027 set_address(new_node, &addr);
2029 if (get_irn_pinned(node) == op_pin_state_floats) {
2030 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2031 && pn_ia32_vfld_res == pn_ia32_Load_res
2032 && pn_ia32_Load_res == pn_ia32_res);
2033 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2036 SET_IA32_ORIG_NODE(new_node, node);
2038 be_dep_on_frame(new_node);
2042 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2043 ir_node *ptr, ir_node *other)
2050 /* we only use address mode if we're the only user of the load */
2051 if (get_irn_n_edges(node) > 1)
2054 load = get_Proj_pred(node);
2057 if (get_nodes_block(load) != block)
2060 /* store should have the same pointer as the load */
2061 if (get_Load_ptr(load) != ptr)
2064 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2065 if (other != NULL &&
2066 get_nodes_block(other) == block &&
2067 heights_reachable_in_block(heights, other, load)) {
2071 if (prevents_AM(block, load, mem))
2073 /* Store should be attached to the load via mem */
2074 assert(heights_reachable_in_block(heights, mem, load));
2079 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2080 ir_node *mem, ir_node *ptr, ir_mode *mode,
2081 construct_binop_dest_func *func,
2082 construct_binop_dest_func *func8bit,
2083 match_flags_t flags)
2085 ir_node *src_block = get_nodes_block(node);
2093 ia32_address_mode_t am;
2094 ia32_address_t *addr = &am.addr;
2095 memset(&am, 0, sizeof(am));
2097 assert(flags & match_immediate); /* there is no destam node without... */
2098 commutative = (flags & match_commutative) != 0;
2100 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2101 build_address(&am, op1, ia32_create_am_double_use);
2102 new_op = create_immediate_or_transform(op2, 0);
2103 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2104 build_address(&am, op2, ia32_create_am_double_use);
2105 new_op = create_immediate_or_transform(op1, 0);
2110 if (addr->base == NULL)
2111 addr->base = noreg_GP;
2112 if (addr->index == NULL)
2113 addr->index = noreg_GP;
2114 if (addr->mem == NULL)
2117 dbgi = get_irn_dbg_info(node);
2118 block = be_transform_node(src_block);
2119 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2121 if (get_mode_size_bits(mode) == 8) {
2122 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2124 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2126 set_address(new_node, addr);
2127 set_ia32_op_type(new_node, ia32_AddrModeD);
2128 set_ia32_ls_mode(new_node, mode);
2129 SET_IA32_ORIG_NODE(new_node, node);
2131 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2132 mem_proj = be_transform_node(am.mem_proj);
2133 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2138 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2139 ir_node *ptr, ir_mode *mode,
2140 construct_unop_dest_func *func)
2142 ir_node *src_block = get_nodes_block(node);
2148 ia32_address_mode_t am;
2149 ia32_address_t *addr = &am.addr;
2151 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2154 memset(&am, 0, sizeof(am));
2155 build_address(&am, op, ia32_create_am_double_use);
2157 dbgi = get_irn_dbg_info(node);
2158 block = be_transform_node(src_block);
2159 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2160 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2161 set_address(new_node, addr);
2162 set_ia32_op_type(new_node, ia32_AddrModeD);
2163 set_ia32_ls_mode(new_node, mode);
2164 SET_IA32_ORIG_NODE(new_node, node);
2166 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2167 mem_proj = be_transform_node(am.mem_proj);
2168 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2173 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2175 ir_mode *mode = get_irn_mode(node);
2176 ir_node *mux_true = get_Mux_true(node);
2177 ir_node *mux_false = get_Mux_false(node);
2187 ia32_address_t addr;
2189 if (get_mode_size_bits(mode) != 8)
2192 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2194 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2200 build_address_ptr(&addr, ptr, mem);
2202 dbgi = get_irn_dbg_info(node);
2203 block = get_nodes_block(node);
2204 new_block = be_transform_node(block);
2205 cond = get_Mux_sel(node);
2206 flags = get_flags_node(cond, &pnc);
2207 new_mem = be_transform_node(mem);
2208 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2209 addr.index, addr.mem, flags, pnc, negated);
2210 set_address(new_node, &addr);
2211 set_ia32_op_type(new_node, ia32_AddrModeD);
2212 set_ia32_ls_mode(new_node, mode);
2213 SET_IA32_ORIG_NODE(new_node, node);
2218 static ir_node *try_create_dest_am(ir_node *node)
2220 ir_node *val = get_Store_value(node);
2221 ir_node *mem = get_Store_mem(node);
2222 ir_node *ptr = get_Store_ptr(node);
2223 ir_mode *mode = get_irn_mode(val);
2224 unsigned bits = get_mode_size_bits(mode);
2229 /* handle only GP modes for now... */
2230 if (!ia32_mode_needs_gp_reg(mode))
2234 /* store must be the only user of the val node */
2235 if (get_irn_n_edges(val) > 1)
2237 /* skip pointless convs */
2239 ir_node *conv_op = get_Conv_op(val);
2240 ir_mode *pred_mode = get_irn_mode(conv_op);
2241 if (!ia32_mode_needs_gp_reg(pred_mode))
2243 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2251 /* value must be in the same block */
2252 if (get_nodes_block(node) != get_nodes_block(val))
2255 switch (get_irn_opcode(val)) {
2257 op1 = get_Add_left(val);
2258 op2 = get_Add_right(val);
2259 if (ia32_cg_config.use_incdec) {
2260 if (is_Const_1(op2)) {
2261 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2263 } else if (is_Const_Minus_1(op2)) {
2264 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2268 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2269 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2270 match_commutative | match_immediate);
2273 op1 = get_Sub_left(val);
2274 op2 = get_Sub_right(val);
2275 if (is_Const(op2)) {
2276 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2278 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2279 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2283 op1 = get_And_left(val);
2284 op2 = get_And_right(val);
2285 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2286 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2287 match_commutative | match_immediate);
2290 op1 = get_Or_left(val);
2291 op2 = get_Or_right(val);
2292 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2293 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2294 match_commutative | match_immediate);
2297 op1 = get_Eor_left(val);
2298 op2 = get_Eor_right(val);
2299 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2300 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2301 match_commutative | match_immediate);
2304 op1 = get_Shl_left(val);
2305 op2 = get_Shl_right(val);
2306 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2307 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2311 op1 = get_Shr_left(val);
2312 op2 = get_Shr_right(val);
2313 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2314 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2318 op1 = get_Shrs_left(val);
2319 op2 = get_Shrs_right(val);
2320 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2321 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2325 op1 = get_Rotl_left(val);
2326 op2 = get_Rotl_right(val);
2327 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2328 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2331 /* TODO: match ROR patterns... */
2333 new_node = try_create_SetMem(val, ptr, mem);
2336 op1 = get_Minus_op(val);
2337 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2340 /* should be lowered already */
2341 assert(mode != mode_b);
2342 op1 = get_Not_op(val);
2343 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2349 if (new_node != NULL) {
2350 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2351 get_irn_pinned(node) == op_pin_state_pinned) {
2352 set_irn_pinned(new_node, op_pin_state_pinned);
2359 static bool possible_int_mode_for_fp(ir_mode *mode)
2363 if (!mode_is_signed(mode))
2365 size = get_mode_size_bits(mode);
2366 if (size != 16 && size != 32)
2371 static int is_float_to_int_conv(const ir_node *node)
2373 ir_mode *mode = get_irn_mode(node);
2377 if (!possible_int_mode_for_fp(mode))
2382 conv_op = get_Conv_op(node);
2383 conv_mode = get_irn_mode(conv_op);
2385 if (!mode_is_float(conv_mode))
2392 * Transform a Store(floatConst) into a sequence of
2395 * @return the created ia32 Store node
2397 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2399 ir_mode *mode = get_irn_mode(cns);
2400 unsigned size = get_mode_size_bytes(mode);
2401 tarval *tv = get_Const_tarval(cns);
2402 ir_node *block = get_nodes_block(node);
2403 ir_node *new_block = be_transform_node(block);
2404 ir_node *ptr = get_Store_ptr(node);
2405 ir_node *mem = get_Store_mem(node);
2406 dbg_info *dbgi = get_irn_dbg_info(node);
2410 ia32_address_t addr;
2412 assert(size % 4 == 0);
2415 build_address_ptr(&addr, ptr, mem);
2419 get_tarval_sub_bits(tv, ofs) |
2420 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2421 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2422 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2423 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2425 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2426 addr.index, addr.mem, imm);
2428 set_irn_pinned(new_node, get_irn_pinned(node));
2429 set_ia32_op_type(new_node, ia32_AddrModeD);
2430 set_ia32_ls_mode(new_node, mode_Iu);
2431 set_address(new_node, &addr);
2432 SET_IA32_ORIG_NODE(new_node, node);
2435 ins[i++] = new_node;
2440 } while (size != 0);
2443 return new_rd_Sync(dbgi, new_block, i, ins);
2450 * Generate a vfist or vfisttp instruction.
2452 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2453 ir_node *mem, ir_node *val, ir_node **fist)
2457 if (ia32_cg_config.use_fisttp) {
2458 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2459 if other users exists */
2460 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2461 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2462 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2463 be_new_Keep(reg_class, block, 1, &value);
2465 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2468 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2471 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2477 * Transforms a general (no special case) Store.
2479 * @return the created ia32 Store node
2481 static ir_node *gen_general_Store(ir_node *node)
2483 ir_node *val = get_Store_value(node);
2484 ir_mode *mode = get_irn_mode(val);
2485 ir_node *block = get_nodes_block(node);
2486 ir_node *new_block = be_transform_node(block);
2487 ir_node *ptr = get_Store_ptr(node);
2488 ir_node *mem = get_Store_mem(node);
2489 dbg_info *dbgi = get_irn_dbg_info(node);
2490 ir_node *new_val, *new_node, *store;
2491 ia32_address_t addr;
2493 /* check for destination address mode */
2494 new_node = try_create_dest_am(node);
2495 if (new_node != NULL)
2498 /* construct store address */
2499 memset(&addr, 0, sizeof(addr));
2500 ia32_create_address_mode(&addr, ptr, 0);
2502 if (addr.base == NULL) {
2503 addr.base = noreg_GP;
2505 addr.base = be_transform_node(addr.base);
2508 if (addr.index == NULL) {
2509 addr.index = noreg_GP;
2511 addr.index = be_transform_node(addr.index);
2513 addr.mem = be_transform_node(mem);
2515 if (mode_is_float(mode)) {
2516 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2518 while (is_Conv(val) && mode == get_irn_mode(val)) {
2519 ir_node *op = get_Conv_op(val);
2520 if (!mode_is_float(get_irn_mode(op)))
2524 new_val = be_transform_node(val);
2525 if (ia32_cg_config.use_sse2) {
2526 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2527 addr.index, addr.mem, new_val);
2529 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2530 addr.index, addr.mem, new_val, mode);
2533 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2534 val = get_Conv_op(val);
2536 /* TODO: is this optimisation still necessary at all (middleend)? */
2537 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2538 while (is_Conv(val)) {
2539 ir_node *op = get_Conv_op(val);
2540 if (!mode_is_float(get_irn_mode(op)))
2542 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2546 new_val = be_transform_node(val);
2547 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2549 new_val = create_immediate_or_transform(val, 0);
2550 assert(mode != mode_b);
2552 if (get_mode_size_bits(mode) == 8) {
2553 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2554 addr.index, addr.mem, new_val);
2556 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2557 addr.index, addr.mem, new_val);
2562 set_irn_pinned(store, get_irn_pinned(node));
2563 set_ia32_op_type(store, ia32_AddrModeD);
2564 set_ia32_ls_mode(store, mode);
2566 set_address(store, &addr);
2567 SET_IA32_ORIG_NODE(store, node);
2573 * Transforms a Store.
2575 * @return the created ia32 Store node
2577 static ir_node *gen_Store(ir_node *node)
2579 ir_node *val = get_Store_value(node);
2580 ir_mode *mode = get_irn_mode(val);
2582 if (mode_is_float(mode) && is_Const(val)) {
2583 /* We can transform every floating const store
2584 into a sequence of integer stores.
2585 If the constant is already in a register,
2586 it would be better to use it, but we don't
2587 have this information here. */
2588 return gen_float_const_Store(node, val);
2590 return gen_general_Store(node);
2594 * Transforms a Switch.
2596 * @return the created ia32 SwitchJmp node
2598 static ir_node *create_Switch(ir_node *node)
2600 dbg_info *dbgi = get_irn_dbg_info(node);
2601 ir_node *block = be_transform_node(get_nodes_block(node));
2602 ir_node *sel = get_Cond_selector(node);
2603 ir_node *new_sel = be_transform_node(sel);
2604 long switch_min = LONG_MAX;
2605 long switch_max = LONG_MIN;
2606 long default_pn = get_Cond_default_proj(node);
2608 const ir_edge_t *edge;
2610 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2612 /* determine the smallest switch case value */
2613 foreach_out_edge(node, edge) {
2614 ir_node *proj = get_edge_src_irn(edge);
2615 long pn = get_Proj_proj(proj);
2616 if (pn == default_pn)
2619 if (pn < switch_min)
2621 if (pn > switch_max)
2625 if ((unsigned long) (switch_max - switch_min) > 128000) {
2626 panic("Size of switch %+F bigger than 128000", node);
2629 if (switch_min != 0) {
2630 /* if smallest switch case is not 0 we need an additional sub */
2631 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2632 add_ia32_am_offs_int(new_sel, -switch_min);
2633 set_ia32_op_type(new_sel, ia32_AddrModeS);
2635 SET_IA32_ORIG_NODE(new_sel, node);
2638 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2639 SET_IA32_ORIG_NODE(new_node, node);
2645 * Transform a Cond node.
2647 static ir_node *gen_Cond(ir_node *node)
2649 ir_node *block = get_nodes_block(node);
2650 ir_node *new_block = be_transform_node(block);
2651 dbg_info *dbgi = get_irn_dbg_info(node);
2652 ir_node *sel = get_Cond_selector(node);
2653 ir_mode *sel_mode = get_irn_mode(sel);
2654 ir_node *flags = NULL;
2658 if (sel_mode != mode_b) {
2659 return create_Switch(node);
2662 /* we get flags from a Cmp */
2663 flags = get_flags_node(sel, &pnc);
2665 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2666 SET_IA32_ORIG_NODE(new_node, node);
2672 * Transform a be_Copy.
2674 static ir_node *gen_be_Copy(ir_node *node)
2676 ir_node *new_node = be_duplicate_node(node);
2677 ir_mode *mode = get_irn_mode(new_node);
2679 if (ia32_mode_needs_gp_reg(mode)) {
2680 set_irn_mode(new_node, mode_Iu);
2686 static ir_node *create_Fucom(ir_node *node)
2688 dbg_info *dbgi = get_irn_dbg_info(node);
2689 ir_node *block = get_nodes_block(node);
2690 ir_node *new_block = be_transform_node(block);
2691 ir_node *left = get_Cmp_left(node);
2692 ir_node *new_left = be_transform_node(left);
2693 ir_node *right = get_Cmp_right(node);
2697 if (ia32_cg_config.use_fucomi) {
2698 new_right = be_transform_node(right);
2699 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2701 set_ia32_commutative(new_node);
2702 SET_IA32_ORIG_NODE(new_node, node);
2704 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2705 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2707 new_right = be_transform_node(right);
2708 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2711 set_ia32_commutative(new_node);
2713 SET_IA32_ORIG_NODE(new_node, node);
2715 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2716 SET_IA32_ORIG_NODE(new_node, node);
2722 static ir_node *create_Ucomi(ir_node *node)
2724 dbg_info *dbgi = get_irn_dbg_info(node);
2725 ir_node *src_block = get_nodes_block(node);
2726 ir_node *new_block = be_transform_node(src_block);
2727 ir_node *left = get_Cmp_left(node);
2728 ir_node *right = get_Cmp_right(node);
2730 ia32_address_mode_t am;
2731 ia32_address_t *addr = &am.addr;
2733 match_arguments(&am, src_block, left, right, NULL,
2734 match_commutative | match_am);
2736 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2737 addr->mem, am.new_op1, am.new_op2,
2739 set_am_attributes(new_node, &am);
2741 SET_IA32_ORIG_NODE(new_node, node);
2743 new_node = fix_mem_proj(new_node, &am);
2749 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2750 * to fold an and into a test node
2752 static bool can_fold_test_and(ir_node *node)
2754 const ir_edge_t *edge;
2756 /** we can only have eq and lg projs */
2757 foreach_out_edge(node, edge) {
2758 ir_node *proj = get_edge_src_irn(edge);
2759 pn_Cmp pnc = get_Proj_proj(proj);
2760 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2768 * returns true if it is assured, that the upper bits of a node are "clean"
2769 * which means for a 16 or 8 bit value, that the upper bits in the register
2770 * are 0 for unsigned and a copy of the last significant bit for signed
2773 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2775 assert(ia32_mode_needs_gp_reg(mode));
2776 if (get_mode_size_bits(mode) >= 32)
2779 if (is_Proj(transformed_node))
2780 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2782 switch (get_ia32_irn_opcode(transformed_node)) {
2783 case iro_ia32_Conv_I2I:
2784 case iro_ia32_Conv_I2I8Bit: {
2785 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2786 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2788 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2795 if (mode_is_signed(mode)) {
2796 return false; /* TODO handle signed modes */
2798 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2799 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2800 const ia32_immediate_attr_t *attr
2801 = get_ia32_immediate_attr_const(right);
2802 if (attr->symconst == 0 &&
2803 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2807 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2811 /* TODO too conservative if shift amount is constant */
2812 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2815 if (!mode_is_signed(mode)) {
2817 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2818 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2820 /* TODO if one is known to be zero extended, then || is sufficient */
2825 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2826 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2828 case iro_ia32_Const:
2829 case iro_ia32_Immediate: {
2830 const ia32_immediate_attr_t *attr =
2831 get_ia32_immediate_attr_const(transformed_node);
2832 if (mode_is_signed(mode)) {
2833 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2834 return shifted == 0 || shifted == -1;
2836 unsigned long shifted = (unsigned long)attr->offset;
2837 shifted >>= get_mode_size_bits(mode);
2838 return shifted == 0;
2848 * Generate code for a Cmp.
2850 static ir_node *gen_Cmp(ir_node *node)
2852 dbg_info *dbgi = get_irn_dbg_info(node);
2853 ir_node *block = get_nodes_block(node);
2854 ir_node *new_block = be_transform_node(block);
2855 ir_node *left = get_Cmp_left(node);
2856 ir_node *right = get_Cmp_right(node);
2857 ir_mode *cmp_mode = get_irn_mode(left);
2859 ia32_address_mode_t am;
2860 ia32_address_t *addr = &am.addr;
2863 if (mode_is_float(cmp_mode)) {
2864 if (ia32_cg_config.use_sse2) {
2865 return create_Ucomi(node);
2867 return create_Fucom(node);
2871 assert(ia32_mode_needs_gp_reg(cmp_mode));
2873 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2874 cmp_unsigned = !mode_is_signed(cmp_mode);
2875 if (is_Const_0(right) &&
2877 get_irn_n_edges(left) == 1 &&
2878 can_fold_test_and(node)) {
2879 /* Test(and_left, and_right) */
2880 ir_node *and_left = get_And_left(left);
2881 ir_node *and_right = get_And_right(left);
2883 /* matze: code here used mode instead of cmd_mode, I think it is always
2884 * the same as cmp_mode, but I leave this here to see if this is really
2887 assert(get_irn_mode(and_left) == cmp_mode);
2889 match_arguments(&am, block, and_left, and_right, NULL,
2891 match_am | match_8bit_am | match_16bit_am |
2892 match_am_and_immediates | match_immediate);
2894 /* use 32bit compare mode if possible since the opcode is smaller */
2895 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2896 upper_bits_clean(am.new_op2, cmp_mode)) {
2897 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2900 if (get_mode_size_bits(cmp_mode) == 8) {
2901 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2902 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2905 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2906 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2909 /* Cmp(left, right) */
2910 match_arguments(&am, block, left, right, NULL,
2911 match_commutative | match_am | match_8bit_am |
2912 match_16bit_am | match_am_and_immediates |
2914 /* use 32bit compare mode if possible since the opcode is smaller */
2915 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2916 upper_bits_clean(am.new_op2, cmp_mode)) {
2917 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2920 if (get_mode_size_bits(cmp_mode) == 8) {
2921 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2922 addr->index, addr->mem, am.new_op1,
2923 am.new_op2, am.ins_permuted,
2926 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2927 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2930 set_am_attributes(new_node, &am);
2931 set_ia32_ls_mode(new_node, cmp_mode);
2933 SET_IA32_ORIG_NODE(new_node, node);
2935 new_node = fix_mem_proj(new_node, &am);
2940 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2943 dbg_info *dbgi = get_irn_dbg_info(node);
2944 ir_node *block = get_nodes_block(node);
2945 ir_node *new_block = be_transform_node(block);
2946 ir_node *val_true = get_Mux_true(node);
2947 ir_node *val_false = get_Mux_false(node);
2949 ia32_address_mode_t am;
2950 ia32_address_t *addr;
2952 assert(ia32_cg_config.use_cmov);
2953 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2957 match_arguments(&am, block, val_false, val_true, flags,
2958 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2960 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2961 addr->mem, am.new_op1, am.new_op2, new_flags,
2962 am.ins_permuted, pnc);
2963 set_am_attributes(new_node, &am);
2965 SET_IA32_ORIG_NODE(new_node, node);
2967 new_node = fix_mem_proj(new_node, &am);
2973 * Creates a ia32 Setcc instruction.
2975 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2976 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2979 ir_mode *mode = get_irn_mode(orig_node);
2982 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2983 SET_IA32_ORIG_NODE(new_node, orig_node);
2985 /* we might need to conv the result up */
2986 if (get_mode_size_bits(mode) > 8) {
2987 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2988 nomem, new_node, mode_Bu);
2989 SET_IA32_ORIG_NODE(new_node, orig_node);
2996 * Create instruction for an unsigned Difference or Zero.
2998 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3000 ir_mode *mode = get_irn_mode(psi);
3001 ir_node *new_node, *sub, *sbb, *eflags, *block;
3005 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3006 match_mode_neutral | match_am | match_immediate | match_two_users);
3008 block = get_nodes_block(new_node);
3010 if (is_Proj(new_node)) {
3011 sub = get_Proj_pred(new_node);
3012 assert(is_ia32_Sub(sub));
3015 set_irn_mode(sub, mode_T);
3016 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3018 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3020 dbgi = get_irn_dbg_info(psi);
3021 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3023 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3024 set_ia32_commutative(new_node);
3029 * Create an const array of two float consts.
3031 * @param c0 the first constant
3032 * @param c1 the second constant
3033 * @param new_mode IN/OUT for the mode of the constants, if NULL
3034 * smallest possible mode will be used
3036 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3038 ir_mode *mode = *new_mode;
3040 ir_initializer_t *initializer;
3041 tarval *tv0 = get_Const_tarval(c0);
3042 tarval *tv1 = get_Const_tarval(c1);
3045 /* detect the best mode for the constants */
3046 mode = get_tarval_mode(tv0);
3048 if (mode != mode_F) {
3049 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3050 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3052 tv0 = tarval_convert_to(tv0, mode);
3053 tv1 = tarval_convert_to(tv1, mode);
3054 } else if (mode != mode_D) {
3055 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3056 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3058 tv0 = tarval_convert_to(tv0, mode);
3059 tv1 = tarval_convert_to(tv1, mode);
3066 tp = ia32_create_float_type(mode, 4);
3067 tp = ia32_create_float_array(tp);
3069 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3071 set_entity_ld_ident(ent, get_entity_ident(ent));
3072 set_entity_visibility(ent, visibility_local);
3073 set_entity_variability(ent, variability_constant);
3074 set_entity_allocation(ent, allocation_static);
3076 initializer = create_initializer_compound(2);
3078 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3079 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3081 set_entity_initializer(ent, initializer);
3088 * Transforms a Mux node into some code sequence.
3090 * @return The transformed node.
3092 static ir_node *gen_Mux(ir_node *node)
3094 dbg_info *dbgi = get_irn_dbg_info(node);
3095 ir_node *block = get_nodes_block(node);
3096 ir_node *new_block = be_transform_node(block);
3097 ir_node *mux_true = get_Mux_true(node);
3098 ir_node *mux_false = get_Mux_false(node);
3099 ir_node *cond = get_Mux_sel(node);
3100 ir_mode *mode = get_irn_mode(node);
3105 assert(get_irn_mode(cond) == mode_b);
3107 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3108 if (mode_is_float(mode)) {
3109 ir_node *cmp = get_Proj_pred(cond);
3110 ir_node *cmp_left = get_Cmp_left(cmp);
3111 ir_node *cmp_right = get_Cmp_right(cmp);
3112 pn_Cmp pnc = get_Proj_proj(cond);
3114 if (ia32_cg_config.use_sse2) {
3115 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3116 if (cmp_left == mux_true && cmp_right == mux_false) {
3117 /* Mux(a <= b, a, b) => MIN */
3118 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3119 match_commutative | match_am | match_two_users);
3120 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3121 /* Mux(a <= b, b, a) => MAX */
3122 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3123 match_commutative | match_am | match_two_users);
3125 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3126 if (cmp_left == mux_true && cmp_right == mux_false) {
3127 /* Mux(a >= b, a, b) => MAX */
3128 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3129 match_commutative | match_am | match_two_users);
3130 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3131 /* Mux(a >= b, b, a) => MIN */
3132 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3133 match_commutative | match_am | match_two_users);
3137 if (is_Const(mux_true) && is_Const(mux_false)) {
3138 ia32_address_mode_t am;
3143 flags = get_flags_node(cond, &pnc);
3144 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3146 if (ia32_cg_config.use_sse2) {
3147 /* cannot load from different mode on SSE */
3150 /* x87 can load any mode */
3154 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3156 switch (get_mode_size_bytes(new_mode)) {
3166 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3167 set_ia32_am_scale(new_node, 2);
3172 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3173 set_ia32_am_scale(new_node, 1);
3176 /* arg, shift 16 NOT supported */
3178 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3181 panic("Unsupported constant size");
3184 am.ls_mode = new_mode;
3185 am.addr.base = noreg_GP;
3186 am.addr.index = new_node;
3187 am.addr.mem = nomem;
3189 am.addr.scale = scale;
3190 am.addr.use_frame = 0;
3191 am.addr.frame_entity = NULL;
3192 am.addr.symconst_sign = 0;
3193 am.mem_proj = am.addr.mem;
3194 am.op_type = ia32_AddrModeS;
3197 am.pinned = op_pin_state_floats;
3199 am.ins_permuted = 0;
3201 if (ia32_cg_config.use_sse2)
3202 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3204 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3205 set_am_attributes(load, &am);
3207 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3209 panic("cannot transform floating point Mux");
3212 assert(ia32_mode_needs_gp_reg(mode));
3214 if (is_Proj(cond)) {
3215 ir_node *cmp = get_Proj_pred(cond);
3217 ir_node *cmp_left = get_Cmp_left(cmp);
3218 ir_node *cmp_right = get_Cmp_right(cmp);
3219 pn_Cmp pnc = get_Proj_proj(cond);
3221 /* check for unsigned Doz first */
3222 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3223 is_Const_0(mux_false) && is_Sub(mux_true) &&
3224 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3225 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3226 return create_Doz(node, cmp_left, cmp_right);
3227 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3228 is_Const_0(mux_true) && is_Sub(mux_false) &&
3229 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3230 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3231 return create_Doz(node, cmp_left, cmp_right);
3236 flags = get_flags_node(cond, &pnc);
3238 if (is_Const(mux_true) && is_Const(mux_false)) {
3239 /* both are const, good */
3240 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3241 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3242 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3243 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3245 /* Not that simple. */
3250 new_node = create_CMov(node, cond, flags, pnc);
3258 * Create a conversion from x87 state register to general purpose.
3260 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3262 ir_node *block = be_transform_node(get_nodes_block(node));
3263 ir_node *op = get_Conv_op(node);
3264 ir_node *new_op = be_transform_node(op);
3265 ir_graph *irg = current_ir_graph;
3266 dbg_info *dbgi = get_irn_dbg_info(node);
3267 ir_mode *mode = get_irn_mode(node);
3268 ir_node *fist, *load, *mem;
3270 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3271 set_irn_pinned(fist, op_pin_state_floats);
3272 set_ia32_use_frame(fist);
3273 set_ia32_op_type(fist, ia32_AddrModeD);
3275 assert(get_mode_size_bits(mode) <= 32);
3276 /* exception we can only store signed 32 bit integers, so for unsigned
3277 we store a 64bit (signed) integer and load the lower bits */
3278 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3279 set_ia32_ls_mode(fist, mode_Ls);
3281 set_ia32_ls_mode(fist, mode_Is);
3283 SET_IA32_ORIG_NODE(fist, node);
3286 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3288 set_irn_pinned(load, op_pin_state_floats);
3289 set_ia32_use_frame(load);
3290 set_ia32_op_type(load, ia32_AddrModeS);
3291 set_ia32_ls_mode(load, mode_Is);
3292 if (get_ia32_ls_mode(fist) == mode_Ls) {
3293 ia32_attr_t *attr = get_ia32_attr(load);
3294 attr->data.need_64bit_stackent = 1;
3296 ia32_attr_t *attr = get_ia32_attr(load);
3297 attr->data.need_32bit_stackent = 1;
3299 SET_IA32_ORIG_NODE(load, node);
3301 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3305 * Creates a x87 strict Conv by placing a Store and a Load
3307 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3309 ir_node *block = get_nodes_block(node);
3310 ir_graph *irg = get_Block_irg(block);
3311 dbg_info *dbgi = get_irn_dbg_info(node);
3312 ir_node *frame = get_irg_frame(irg);
3313 ir_node *store, *load;
3316 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3317 set_ia32_use_frame(store);
3318 set_ia32_op_type(store, ia32_AddrModeD);
3319 SET_IA32_ORIG_NODE(store, node);
3321 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3322 set_ia32_use_frame(load);
3323 set_ia32_op_type(load, ia32_AddrModeS);
3324 SET_IA32_ORIG_NODE(load, node);
3326 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3330 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3331 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3333 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3335 func = get_mode_size_bits(mode) == 8 ?
3336 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3337 return func(dbgi, block, base, index, mem, val, mode);
3341 * Create a conversion from general purpose to x87 register
3343 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3345 ir_node *src_block = get_nodes_block(node);
3346 ir_node *block = be_transform_node(src_block);
3347 ir_graph *irg = get_Block_irg(block);
3348 dbg_info *dbgi = get_irn_dbg_info(node);
3349 ir_node *op = get_Conv_op(node);
3350 ir_node *new_op = NULL;
3352 ir_mode *store_mode;
3357 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3358 if (possible_int_mode_for_fp(src_mode)) {
3359 ia32_address_mode_t am;
3361 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3362 if (am.op_type == ia32_AddrModeS) {
3363 ia32_address_t *addr = &am.addr;
3365 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3366 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3368 set_am_attributes(fild, &am);
3369 SET_IA32_ORIG_NODE(fild, node);
3371 fix_mem_proj(fild, &am);
3376 if (new_op == NULL) {
3377 new_op = be_transform_node(op);
3380 mode = get_irn_mode(op);
3382 /* first convert to 32 bit signed if necessary */
3383 if (get_mode_size_bits(src_mode) < 32) {
3384 if (!upper_bits_clean(new_op, src_mode)) {
3385 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3386 SET_IA32_ORIG_NODE(new_op, node);
3391 assert(get_mode_size_bits(mode) == 32);
3394 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3396 set_ia32_use_frame(store);
3397 set_ia32_op_type(store, ia32_AddrModeD);
3398 set_ia32_ls_mode(store, mode_Iu);
3400 /* exception for 32bit unsigned, do a 64bit spill+load */
3401 if (!mode_is_signed(mode)) {
3404 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3406 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3407 noreg_GP, nomem, zero_const);
3409 set_ia32_use_frame(zero_store);
3410 set_ia32_op_type(zero_store, ia32_AddrModeD);
3411 add_ia32_am_offs_int(zero_store, 4);
3412 set_ia32_ls_mode(zero_store, mode_Iu);
3417 store = new_rd_Sync(dbgi, block, 2, in);
3418 store_mode = mode_Ls;
3420 store_mode = mode_Is;
3424 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3426 set_ia32_use_frame(fild);
3427 set_ia32_op_type(fild, ia32_AddrModeS);
3428 set_ia32_ls_mode(fild, store_mode);
3430 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3436 * Create a conversion from one integer mode into another one
3438 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3439 dbg_info *dbgi, ir_node *block, ir_node *op,
3442 ir_node *new_block = be_transform_node(block);
3444 ir_mode *smaller_mode;
3445 ia32_address_mode_t am;
3446 ia32_address_t *addr = &am.addr;
3449 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3450 smaller_mode = src_mode;
3452 smaller_mode = tgt_mode;
3455 #ifdef DEBUG_libfirm
3457 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3462 match_arguments(&am, block, NULL, op, NULL,
3463 match_am | match_8bit_am | match_16bit_am);
3465 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3466 /* unnecessary conv. in theory it shouldn't have been AM */
3467 assert(is_ia32_NoReg_GP(addr->base));
3468 assert(is_ia32_NoReg_GP(addr->index));
3469 assert(is_NoMem(addr->mem));
3470 assert(am.addr.offset == 0);
3471 assert(am.addr.symconst_ent == NULL);
3475 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3476 addr->mem, am.new_op2, smaller_mode);
3477 set_am_attributes(new_node, &am);
3478 /* match_arguments assume that out-mode = in-mode, this isn't true here
3480 set_ia32_ls_mode(new_node, smaller_mode);
3481 SET_IA32_ORIG_NODE(new_node, node);
3482 new_node = fix_mem_proj(new_node, &am);
3487 * Transforms a Conv node.
3489 * @return The created ia32 Conv node
3491 static ir_node *gen_Conv(ir_node *node)
3493 ir_node *block = get_nodes_block(node);
3494 ir_node *new_block = be_transform_node(block);
3495 ir_node *op = get_Conv_op(node);
3496 ir_node *new_op = NULL;
3497 dbg_info *dbgi = get_irn_dbg_info(node);
3498 ir_mode *src_mode = get_irn_mode(op);
3499 ir_mode *tgt_mode = get_irn_mode(node);
3500 int src_bits = get_mode_size_bits(src_mode);
3501 int tgt_bits = get_mode_size_bits(tgt_mode);
3502 ir_node *res = NULL;
3504 assert(!mode_is_int(src_mode) || src_bits <= 32);
3505 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3507 if (src_mode == mode_b) {
3508 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3509 /* nothing to do, we already model bools as 0/1 ints */
3510 return be_transform_node(op);
3513 if (src_mode == tgt_mode) {
3514 if (get_Conv_strict(node)) {
3515 if (ia32_cg_config.use_sse2) {
3516 /* when we are in SSE mode, we can kill all strict no-op conversion */
3517 return be_transform_node(op);
3520 /* this should be optimized already, but who knows... */
3521 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3522 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3523 return be_transform_node(op);
3527 if (mode_is_float(src_mode)) {
3528 new_op = be_transform_node(op);
3529 /* we convert from float ... */
3530 if (mode_is_float(tgt_mode)) {
3532 /* Matze: I'm a bit unsure what the following is for? seems wrong
3534 if (src_mode == mode_E && tgt_mode == mode_D
3535 && !get_Conv_strict(node)) {
3536 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3542 if (ia32_cg_config.use_sse2) {
3543 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3544 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3546 set_ia32_ls_mode(res, tgt_mode);
3548 if (get_Conv_strict(node)) {
3549 /* if fp_no_float_fold is not set then we assume that we
3550 * don't have any float operations in a non
3551 * mode_float_arithmetic mode and can skip strict upconvs */
3552 if (src_bits < tgt_bits
3553 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3554 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3557 res = gen_x87_strict_conv(tgt_mode, new_op);
3558 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3562 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3567 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3568 if (ia32_cg_config.use_sse2) {
3569 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3571 set_ia32_ls_mode(res, src_mode);
3573 return gen_x87_fp_to_gp(node);
3577 /* we convert from int ... */
3578 if (mode_is_float(tgt_mode)) {
3580 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3581 if (ia32_cg_config.use_sse2) {
3582 new_op = be_transform_node(op);
3583 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3585 set_ia32_ls_mode(res, tgt_mode);
3587 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3588 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3589 res = gen_x87_gp_to_fp(node, src_mode);
3591 /* we need a strict-Conv, if the int mode has more bits than the
3593 if (float_mantissa < int_mantissa) {
3594 res = gen_x87_strict_conv(tgt_mode, res);
3595 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3599 } else if (tgt_mode == mode_b) {
3600 /* mode_b lowering already took care that we only have 0/1 values */
3601 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3602 src_mode, tgt_mode));
3603 return be_transform_node(op);
3606 if (src_bits == tgt_bits) {
3607 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3608 src_mode, tgt_mode));
3609 return be_transform_node(op);
3612 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3620 static ir_node *create_immediate_or_transform(ir_node *node,
3621 char immediate_constraint_type)
3623 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3624 if (new_node == NULL) {
3625 new_node = be_transform_node(node);
3631 * Transforms a FrameAddr into an ia32 Add.
3633 static ir_node *gen_be_FrameAddr(ir_node *node)
3635 ir_node *block = be_transform_node(get_nodes_block(node));
3636 ir_node *op = be_get_FrameAddr_frame(node);
3637 ir_node *new_op = be_transform_node(op);
3638 dbg_info *dbgi = get_irn_dbg_info(node);
3641 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3642 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3643 set_ia32_use_frame(new_node);
3645 SET_IA32_ORIG_NODE(new_node, node);
3651 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3653 static ir_node *gen_be_Return(ir_node *node)
3655 ir_graph *irg = current_ir_graph;
3656 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3657 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3658 ir_entity *ent = get_irg_entity(irg);
3659 ir_type *tp = get_entity_type(ent);
3664 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3665 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3667 int pn_ret_val, pn_ret_mem, arity, i;
3669 assert(ret_val != NULL);
3670 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3671 return be_duplicate_node(node);
3674 res_type = get_method_res_type(tp, 0);
3676 if (! is_Primitive_type(res_type)) {
3677 return be_duplicate_node(node);
3680 mode = get_type_mode(res_type);
3681 if (! mode_is_float(mode)) {
3682 return be_duplicate_node(node);
3685 assert(get_method_n_ress(tp) == 1);
3687 pn_ret_val = get_Proj_proj(ret_val);
3688 pn_ret_mem = get_Proj_proj(ret_mem);
3690 /* get the Barrier */
3691 barrier = get_Proj_pred(ret_val);
3693 /* get result input of the Barrier */
3694 ret_val = get_irn_n(barrier, pn_ret_val);
3695 new_ret_val = be_transform_node(ret_val);
3697 /* get memory input of the Barrier */
3698 ret_mem = get_irn_n(barrier, pn_ret_mem);
3699 new_ret_mem = be_transform_node(ret_mem);
3701 frame = get_irg_frame(irg);
3703 dbgi = get_irn_dbg_info(barrier);
3704 block = be_transform_node(get_nodes_block(barrier));
3706 /* store xmm0 onto stack */
3707 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3708 new_ret_mem, new_ret_val);
3709 set_ia32_ls_mode(sse_store, mode);
3710 set_ia32_op_type(sse_store, ia32_AddrModeD);
3711 set_ia32_use_frame(sse_store);
3713 /* load into x87 register */
3714 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3715 set_ia32_op_type(fld, ia32_AddrModeS);
3716 set_ia32_use_frame(fld);
3718 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3719 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3721 /* create a new barrier */
3722 arity = get_irn_arity(barrier);
3723 in = ALLOCAN(ir_node*, arity);
3724 for (i = 0; i < arity; ++i) {
3727 if (i == pn_ret_val) {
3729 } else if (i == pn_ret_mem) {
3732 ir_node *in = get_irn_n(barrier, i);
3733 new_in = be_transform_node(in);
3738 new_barrier = new_ir_node(dbgi, irg, block,
3739 get_irn_op(barrier), get_irn_mode(barrier),
3741 copy_node_attr(barrier, new_barrier);
3742 be_duplicate_deps(barrier, new_barrier);
3743 be_set_transformed_node(barrier, new_barrier);
3745 /* transform normally */
3746 return be_duplicate_node(node);
3750 * Transform a be_AddSP into an ia32_SubSP.
3752 static ir_node *gen_be_AddSP(ir_node *node)
3754 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3755 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3757 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3758 match_am | match_immediate);
3762 * Transform a be_SubSP into an ia32_AddSP
3764 static ir_node *gen_be_SubSP(ir_node *node)
3766 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3767 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3769 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3770 match_am | match_immediate);
3774 * Change some phi modes
3776 static ir_node *gen_Phi(ir_node *node)
3778 ir_node *block = be_transform_node(get_nodes_block(node));
3779 ir_graph *irg = current_ir_graph;
3780 dbg_info *dbgi = get_irn_dbg_info(node);
3781 ir_mode *mode = get_irn_mode(node);
3784 if (ia32_mode_needs_gp_reg(mode)) {
3785 /* we shouldn't have any 64bit stuff around anymore */
3786 assert(get_mode_size_bits(mode) <= 32);
3787 /* all integer operations are on 32bit registers now */
3789 } else if (mode_is_float(mode)) {
3790 if (ia32_cg_config.use_sse2) {
3797 /* phi nodes allow loops, so we use the old arguments for now
3798 * and fix this later */
3799 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3800 get_irn_in(node) + 1);
3801 copy_node_attr(node, phi);
3802 be_duplicate_deps(node, phi);
3804 be_enqueue_preds(node);
3812 static ir_node *gen_IJmp(ir_node *node)
3814 ir_node *block = get_nodes_block(node);
3815 ir_node *new_block = be_transform_node(block);
3816 dbg_info *dbgi = get_irn_dbg_info(node);
3817 ir_node *op = get_IJmp_target(node);
3819 ia32_address_mode_t am;
3820 ia32_address_t *addr = &am.addr;
3822 assert(get_irn_mode(op) == mode_P);
3824 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3826 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3827 addr->mem, am.new_op2);
3828 set_am_attributes(new_node, &am);
3829 SET_IA32_ORIG_NODE(new_node, node);
3831 new_node = fix_mem_proj(new_node, &am);
3837 * Transform a Bound node.
3839 static ir_node *gen_Bound(ir_node *node)
3842 ir_node *lower = get_Bound_lower(node);
3843 dbg_info *dbgi = get_irn_dbg_info(node);
3845 if (is_Const_0(lower)) {
3846 /* typical case for Java */
3847 ir_node *sub, *res, *flags, *block;
3849 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3850 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3852 block = get_nodes_block(res);
3853 if (! is_Proj(res)) {
3855 set_irn_mode(sub, mode_T);
3856 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3858 sub = get_Proj_pred(res);
3860 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3861 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3862 SET_IA32_ORIG_NODE(new_node, node);
3864 panic("generic Bound not supported in ia32 Backend");
3870 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3872 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3873 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3875 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3876 match_immediate | match_mode_neutral);
3879 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3881 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3882 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3883 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3887 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3889 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3890 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3891 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3895 static ir_node *gen_ia32_l_Add(ir_node *node)
3897 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3898 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3899 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3900 match_commutative | match_am | match_immediate |
3901 match_mode_neutral);
3903 if (is_Proj(lowered)) {
3904 lowered = get_Proj_pred(lowered);
3906 assert(is_ia32_Add(lowered));
3907 set_irn_mode(lowered, mode_T);
3913 static ir_node *gen_ia32_l_Adc(ir_node *node)
3915 return gen_binop_flags(node, new_bd_ia32_Adc,
3916 match_commutative | match_am | match_immediate |
3917 match_mode_neutral);
3921 * Transforms a l_MulS into a "real" MulS node.
3923 * @return the created ia32 Mul node
3925 static ir_node *gen_ia32_l_Mul(ir_node *node)
3927 ir_node *left = get_binop_left(node);
3928 ir_node *right = get_binop_right(node);
3930 return gen_binop(node, left, right, new_bd_ia32_Mul,
3931 match_commutative | match_am | match_mode_neutral);
3935 * Transforms a l_IMulS into a "real" IMul1OPS node.
3937 * @return the created ia32 IMul1OP node
3939 static ir_node *gen_ia32_l_IMul(ir_node *node)
3941 ir_node *left = get_binop_left(node);
3942 ir_node *right = get_binop_right(node);
3944 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3945 match_commutative | match_am | match_mode_neutral);
3948 static ir_node *gen_ia32_l_Sub(ir_node *node)
3950 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3951 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3952 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3953 match_am | match_immediate | match_mode_neutral);
3955 if (is_Proj(lowered)) {
3956 lowered = get_Proj_pred(lowered);
3958 assert(is_ia32_Sub(lowered));
3959 set_irn_mode(lowered, mode_T);
3965 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3967 return gen_binop_flags(node, new_bd_ia32_Sbb,
3968 match_am | match_immediate | match_mode_neutral);
3972 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3973 * op1 - target to be shifted
3974 * op2 - contains bits to be shifted into target
3976 * Only op3 can be an immediate.
3978 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3979 ir_node *low, ir_node *count)
3981 ir_node *block = get_nodes_block(node);
3982 ir_node *new_block = be_transform_node(block);
3983 dbg_info *dbgi = get_irn_dbg_info(node);
3984 ir_node *new_high = be_transform_node(high);
3985 ir_node *new_low = be_transform_node(low);
3989 /* the shift amount can be any mode that is bigger than 5 bits, since all
3990 * other bits are ignored anyway */
3991 while (is_Conv(count) &&
3992 get_irn_n_edges(count) == 1 &&
3993 mode_is_int(get_irn_mode(count))) {
3994 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3995 count = get_Conv_op(count);
3997 new_count = create_immediate_or_transform(count, 0);
3999 if (is_ia32_l_ShlD(node)) {
4000 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4003 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4006 SET_IA32_ORIG_NODE(new_node, node);
4011 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4013 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4014 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4015 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4016 return gen_lowered_64bit_shifts(node, high, low, count);
4019 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4021 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4022 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4023 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4024 return gen_lowered_64bit_shifts(node, high, low, count);
4027 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4029 ir_node *src_block = get_nodes_block(node);
4030 ir_node *block = be_transform_node(src_block);
4031 ir_graph *irg = current_ir_graph;
4032 dbg_info *dbgi = get_irn_dbg_info(node);
4033 ir_node *frame = get_irg_frame(irg);
4034 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4035 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4036 ir_node *new_val_low = be_transform_node(val_low);
4037 ir_node *new_val_high = be_transform_node(val_high);
4039 ir_node *sync, *fild, *res;
4040 ir_node *store_low, *store_high;
4042 if (ia32_cg_config.use_sse2) {
4043 panic("ia32_l_LLtoFloat not implemented for SSE2");
4047 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4049 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4051 SET_IA32_ORIG_NODE(store_low, node);
4052 SET_IA32_ORIG_NODE(store_high, node);
4054 set_ia32_use_frame(store_low);
4055 set_ia32_use_frame(store_high);
4056 set_ia32_op_type(store_low, ia32_AddrModeD);
4057 set_ia32_op_type(store_high, ia32_AddrModeD);
4058 set_ia32_ls_mode(store_low, mode_Iu);
4059 set_ia32_ls_mode(store_high, mode_Is);
4060 add_ia32_am_offs_int(store_high, 4);
4064 sync = new_rd_Sync(dbgi, block, 2, in);
4067 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4069 set_ia32_use_frame(fild);
4070 set_ia32_op_type(fild, ia32_AddrModeS);
4071 set_ia32_ls_mode(fild, mode_Ls);
4073 SET_IA32_ORIG_NODE(fild, node);
4075 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4077 if (! mode_is_signed(get_irn_mode(val_high))) {
4078 ia32_address_mode_t am;
4080 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4083 am.addr.base = noreg_GP;
4084 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4085 am.addr.mem = nomem;
4088 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4089 am.addr.use_frame = 0;
4090 am.addr.frame_entity = NULL;
4091 am.addr.symconst_sign = 0;
4092 am.ls_mode = mode_F;
4093 am.mem_proj = nomem;
4094 am.op_type = ia32_AddrModeS;
4096 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4097 am.pinned = op_pin_state_floats;
4099 am.ins_permuted = 0;
4101 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4102 am.new_op1, am.new_op2, get_fpcw());
4103 set_am_attributes(fadd, &am);
4105 set_irn_mode(fadd, mode_T);
4106 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4111 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4113 ir_node *src_block = get_nodes_block(node);
4114 ir_node *block = be_transform_node(src_block);
4115 ir_graph *irg = get_Block_irg(block);
4116 dbg_info *dbgi = get_irn_dbg_info(node);
4117 ir_node *frame = get_irg_frame(irg);
4118 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4119 ir_node *new_val = be_transform_node(val);
4120 ir_node *fist, *mem;
4122 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4123 SET_IA32_ORIG_NODE(fist, node);
4124 set_ia32_use_frame(fist);
4125 set_ia32_op_type(fist, ia32_AddrModeD);
4126 set_ia32_ls_mode(fist, mode_Ls);
4132 * the BAD transformer.
4134 static ir_node *bad_transform(ir_node *node)
4136 panic("No transform function for %+F available.", node);
4140 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4142 ir_node *block = be_transform_node(get_nodes_block(node));
4143 ir_graph *irg = get_Block_irg(block);
4144 ir_node *pred = get_Proj_pred(node);
4145 ir_node *new_pred = be_transform_node(pred);
4146 ir_node *frame = get_irg_frame(irg);
4147 dbg_info *dbgi = get_irn_dbg_info(node);
4148 long pn = get_Proj_proj(node);
4153 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4154 SET_IA32_ORIG_NODE(load, node);
4155 set_ia32_use_frame(load);
4156 set_ia32_op_type(load, ia32_AddrModeS);
4157 set_ia32_ls_mode(load, mode_Iu);
4158 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4159 * 32 bit from it with this particular load */
4160 attr = get_ia32_attr(load);
4161 attr->data.need_64bit_stackent = 1;
4163 if (pn == pn_ia32_l_FloattoLL_res_high) {
4164 add_ia32_am_offs_int(load, 4);
4166 assert(pn == pn_ia32_l_FloattoLL_res_low);
4169 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4175 * Transform the Projs of an AddSP.
4177 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4179 ir_node *block = be_transform_node(get_nodes_block(node));
4180 ir_node *pred = get_Proj_pred(node);
4181 ir_node *new_pred = be_transform_node(pred);
4182 dbg_info *dbgi = get_irn_dbg_info(node);
4183 long proj = get_Proj_proj(node);
4185 if (proj == pn_be_AddSP_sp) {
4186 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4187 pn_ia32_SubSP_stack);
4188 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4190 } else if (proj == pn_be_AddSP_res) {
4191 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4192 pn_ia32_SubSP_addr);
4193 } else if (proj == pn_be_AddSP_M) {
4194 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4197 panic("No idea how to transform proj->AddSP");
4201 * Transform the Projs of a SubSP.
4203 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4205 ir_node *block = be_transform_node(get_nodes_block(node));
4206 ir_node *pred = get_Proj_pred(node);
4207 ir_node *new_pred = be_transform_node(pred);
4208 dbg_info *dbgi = get_irn_dbg_info(node);
4209 long proj = get_Proj_proj(node);
4211 if (proj == pn_be_SubSP_sp) {
4212 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4213 pn_ia32_AddSP_stack);
4214 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4216 } else if (proj == pn_be_SubSP_M) {
4217 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4220 panic("No idea how to transform proj->SubSP");
4224 * Transform and renumber the Projs from a Load.
4226 static ir_node *gen_Proj_Load(ir_node *node)
4229 ir_node *block = be_transform_node(get_nodes_block(node));
4230 ir_node *pred = get_Proj_pred(node);
4231 dbg_info *dbgi = get_irn_dbg_info(node);
4232 long proj = get_Proj_proj(node);
4234 /* loads might be part of source address mode matches, so we don't
4235 * transform the ProjMs yet (with the exception of loads whose result is
4238 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4240 ir_node *old_block = get_nodes_block(node);
4242 /* this is needed, because sometimes we have loops that are only
4243 reachable through the ProjM */
4244 be_enqueue_preds(node);
4245 /* do it in 2 steps, to silence firm verifier */
4246 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4247 set_Proj_proj(res, pn_ia32_mem);
4251 /* renumber the proj */
4252 new_pred = be_transform_node(pred);
4253 if (is_ia32_Load(new_pred)) {
4256 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4258 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4259 case pn_Load_X_regular:
4260 return new_rd_Jmp(dbgi, block);
4261 case pn_Load_X_except:
4262 /* This Load might raise an exception. Mark it. */
4263 set_ia32_exc_label(new_pred, 1);
4264 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4268 } else if (is_ia32_Conv_I2I(new_pred) ||
4269 is_ia32_Conv_I2I8Bit(new_pred)) {
4270 set_irn_mode(new_pred, mode_T);
4271 if (proj == pn_Load_res) {
4272 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4273 } else if (proj == pn_Load_M) {
4274 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4276 } else if (is_ia32_xLoad(new_pred)) {
4279 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4281 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4282 case pn_Load_X_regular:
4283 return new_rd_Jmp(dbgi, block);
4284 case pn_Load_X_except:
4285 /* This Load might raise an exception. Mark it. */
4286 set_ia32_exc_label(new_pred, 1);
4287 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4291 } else if (is_ia32_vfld(new_pred)) {
4294 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4296 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4297 case pn_Load_X_regular:
4298 return new_rd_Jmp(dbgi, block);
4299 case pn_Load_X_except:
4300 /* This Load might raise an exception. Mark it. */
4301 set_ia32_exc_label(new_pred, 1);
4302 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4307 /* can happen for ProJMs when source address mode happened for the
4310 /* however it should not be the result proj, as that would mean the
4311 load had multiple users and should not have been used for
4313 if (proj != pn_Load_M) {
4314 panic("internal error: transformed node not a Load");
4316 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4319 panic("No idea how to transform proj");
4323 * Transform and renumber the Projs from a DivMod like instruction.
4325 static ir_node *gen_Proj_DivMod(ir_node *node)
4327 ir_node *block = be_transform_node(get_nodes_block(node));
4328 ir_node *pred = get_Proj_pred(node);
4329 ir_node *new_pred = be_transform_node(pred);
4330 dbg_info *dbgi = get_irn_dbg_info(node);
4331 long proj = get_Proj_proj(node);
4333 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4335 switch (get_irn_opcode(pred)) {
4339 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4341 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4342 case pn_Div_X_regular:
4343 return new_rd_Jmp(dbgi, block);
4344 case pn_Div_X_except:
4345 set_ia32_exc_label(new_pred, 1);
4346 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4354 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4356 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4357 case pn_Mod_X_except:
4358 set_ia32_exc_label(new_pred, 1);
4359 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4367 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4368 case pn_DivMod_res_div:
4369 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4370 case pn_DivMod_res_mod:
4371 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4372 case pn_DivMod_X_regular:
4373 return new_rd_Jmp(dbgi, block);
4374 case pn_DivMod_X_except:
4375 set_ia32_exc_label(new_pred, 1);
4376 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4385 panic("No idea how to transform proj->DivMod");
4389 * Transform and renumber the Projs from a CopyB.
4391 static ir_node *gen_Proj_CopyB(ir_node *node)
4393 ir_node *block = be_transform_node(get_nodes_block(node));
4394 ir_node *pred = get_Proj_pred(node);
4395 ir_node *new_pred = be_transform_node(pred);
4396 dbg_info *dbgi = get_irn_dbg_info(node);
4397 long proj = get_Proj_proj(node);
4400 case pn_CopyB_M_regular:
4401 if (is_ia32_CopyB_i(new_pred)) {
4402 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4403 } else if (is_ia32_CopyB(new_pred)) {
4404 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4411 panic("No idea how to transform proj->CopyB");
4415 * Transform and renumber the Projs from a Quot.
4417 static ir_node *gen_Proj_Quot(ir_node *node)
4419 ir_node *block = be_transform_node(get_nodes_block(node));
4420 ir_node *pred = get_Proj_pred(node);
4421 ir_node *new_pred = be_transform_node(pred);
4422 dbg_info *dbgi = get_irn_dbg_info(node);
4423 long proj = get_Proj_proj(node);
4427 if (is_ia32_xDiv(new_pred)) {
4428 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4429 } else if (is_ia32_vfdiv(new_pred)) {
4430 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4434 if (is_ia32_xDiv(new_pred)) {
4435 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4436 } else if (is_ia32_vfdiv(new_pred)) {
4437 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4440 case pn_Quot_X_regular:
4441 case pn_Quot_X_except:
4446 panic("No idea how to transform proj->Quot");
4449 static ir_node *gen_be_Call(ir_node *node)
4451 dbg_info *const dbgi = get_irn_dbg_info(node);
4452 ir_node *const src_block = get_nodes_block(node);
4453 ir_node *const block = be_transform_node(src_block);
4454 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4455 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4456 ir_node *const sp = be_transform_node(src_sp);
4457 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4458 ia32_address_mode_t am;
4459 ia32_address_t *const addr = &am.addr;
4464 ir_node * eax = noreg_GP;
4465 ir_node * ecx = noreg_GP;
4466 ir_node * edx = noreg_GP;
4467 unsigned const pop = be_Call_get_pop(node);
4468 ir_type *const call_tp = be_Call_get_type(node);
4469 int old_no_pic_adjust;
4471 /* Run the x87 simulator if the call returns a float value */
4472 if (get_method_n_ress(call_tp) > 0) {
4473 ir_type *const res_type = get_method_res_type(call_tp, 0);
4474 ir_mode *const res_mode = get_type_mode(res_type);
4476 if (res_mode != NULL && mode_is_float(res_mode)) {
4477 env_cg->do_x87_sim = 1;
4481 /* We do not want be_Call direct calls */
4482 assert(be_Call_get_entity(node) == NULL);
4484 /* special case for PIC trampoline calls */
4485 old_no_pic_adjust = no_pic_adjust;
4486 no_pic_adjust = env_cg->birg->main_env->options->pic;
4488 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4489 match_am | match_immediate);
4491 no_pic_adjust = old_no_pic_adjust;
4493 i = get_irn_arity(node) - 1;
4494 fpcw = be_transform_node(get_irn_n(node, i--));
4495 for (; i >= be_pos_Call_first_arg; --i) {
4496 arch_register_req_t const *const req = arch_get_register_req(node, i);
4497 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4499 assert(req->type == arch_register_req_type_limited);
4500 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4502 switch (*req->limited) {
4503 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4504 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4505 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4506 default: panic("Invalid GP register for register parameter");
4510 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4511 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4512 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4513 set_am_attributes(call, &am);
4514 call = fix_mem_proj(call, &am);
4516 if (get_irn_pinned(node) == op_pin_state_pinned)
4517 set_irn_pinned(call, op_pin_state_pinned);
4519 SET_IA32_ORIG_NODE(call, node);
4521 if (ia32_cg_config.use_sse2) {
4522 /* remember this call for post-processing */
4523 ARR_APP1(ir_node *, call_list, call);
4524 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4531 * Transform Builtin trap
4533 static ir_node *gen_trap(ir_node *node) {
4534 dbg_info *dbgi = get_irn_dbg_info(node);
4535 ir_node *block = be_transform_node(get_nodes_block(node));
4536 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4538 return new_bd_ia32_UD2(dbgi, block, mem);
4542 * Transform Builtin debugbreak
4544 static ir_node *gen_debugbreak(ir_node *node) {
4545 dbg_info *dbgi = get_irn_dbg_info(node);
4546 ir_node *block = be_transform_node(get_nodes_block(node));
4547 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4549 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4553 * Transform Builtin return_address
4555 static ir_node *gen_return_address(ir_node *node) {
4556 ir_node *param = get_Builtin_param(node, 0);
4557 ir_node *frame = get_Builtin_param(node, 1);
4558 dbg_info *dbgi = get_irn_dbg_info(node);
4559 tarval *tv = get_Const_tarval(param);
4560 unsigned long value = get_tarval_long(tv);
4562 ir_node *block = be_transform_node(get_nodes_block(node));
4563 ir_node *ptr = be_transform_node(frame);
4567 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4568 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4569 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4572 /* load the return address from this frame */
4573 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4575 set_irn_pinned(load, get_irn_pinned(node));
4576 set_ia32_op_type(load, ia32_AddrModeS);
4577 set_ia32_ls_mode(load, mode_Iu);
4579 set_ia32_am_offs_int(load, 0);
4580 set_ia32_use_frame(load);
4581 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4583 if (get_irn_pinned(node) == op_pin_state_floats) {
4584 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4585 && pn_ia32_vfld_res == pn_ia32_Load_res
4586 && pn_ia32_Load_res == pn_ia32_res);
4587 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4590 SET_IA32_ORIG_NODE(load, node);
4591 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4595 * Transform Builtin frame_address
4597 static ir_node *gen_frame_address(ir_node *node) {
4598 ir_node *param = get_Builtin_param(node, 0);
4599 ir_node *frame = get_Builtin_param(node, 1);
4600 dbg_info *dbgi = get_irn_dbg_info(node);
4601 tarval *tv = get_Const_tarval(param);
4602 unsigned long value = get_tarval_long(tv);
4604 ir_node *block = be_transform_node(get_nodes_block(node));
4605 ir_node *ptr = be_transform_node(frame);
4610 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4611 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4612 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4615 /* load the frame address from this frame */
4616 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4618 set_irn_pinned(load, get_irn_pinned(node));
4619 set_ia32_op_type(load, ia32_AddrModeS);
4620 set_ia32_ls_mode(load, mode_Iu);
4622 ent = ia32_get_frame_address_entity();
4624 set_ia32_am_offs_int(load, 0);
4625 set_ia32_use_frame(load);
4626 set_ia32_frame_ent(load, ent);
4628 /* will fail anyway, but gcc does this: */
4629 set_ia32_am_offs_int(load, 0);
4632 if (get_irn_pinned(node) == op_pin_state_floats) {
4633 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4634 && pn_ia32_vfld_res == pn_ia32_Load_res
4635 && pn_ia32_Load_res == pn_ia32_res);
4636 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4639 SET_IA32_ORIG_NODE(load, node);
4640 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4644 * Transform Builtin frame_address
4646 static ir_node *gen_prefetch(ir_node *node) {
4648 ir_node *ptr, *block, *mem, *base, *index;
4649 ir_node *param, *new_node;
4652 ia32_address_t addr;
4654 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4655 /* no prefetch at all, route memory */
4656 return be_transform_node(get_Builtin_mem(node));
4659 param = get_Builtin_param(node, 1);
4660 tv = get_Const_tarval(param);
4661 rw = get_tarval_long(tv);
4663 /* construct load address */
4664 memset(&addr, 0, sizeof(addr));
4665 ptr = get_Builtin_param(node, 0);
4666 ia32_create_address_mode(&addr, ptr, 0);
4673 base = be_transform_node(base);
4676 if (index == NULL) {
4679 index = be_transform_node(index);
4682 dbgi = get_irn_dbg_info(node);
4683 block = be_transform_node(get_nodes_block(node));
4684 mem = be_transform_node(get_Builtin_mem(node));
4686 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4687 /* we have 3DNow!, this was already checked above */
4688 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4689 } else if (ia32_cg_config.use_sse_prefetch) {
4690 /* note: rw == 1 is IGNORED in that case */
4691 param = get_Builtin_param(node, 2);
4692 tv = get_Const_tarval(param);
4693 locality = get_tarval_long(tv);
4695 /* SSE style prefetch */
4698 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4701 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4704 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4707 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4711 assert(ia32_cg_config.use_3dnow_prefetch);
4712 /* 3DNow! style prefetch */
4713 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4716 set_irn_pinned(new_node, get_irn_pinned(node));
4717 set_ia32_op_type(new_node, ia32_AddrModeS);
4718 set_ia32_ls_mode(new_node, mode_Bu);
4719 set_address(new_node, &addr);
4721 SET_IA32_ORIG_NODE(new_node, node);
4723 be_dep_on_frame(new_node);
4724 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4728 * Transform bsf like node
4730 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4732 ir_node *param = get_Builtin_param(node, 0);
4733 dbg_info *dbgi = get_irn_dbg_info(node);
4735 ir_node *block = get_nodes_block(node);
4736 ir_node *new_block = be_transform_node(block);
4738 ia32_address_mode_t am;
4739 ia32_address_t *addr = &am.addr;
4742 match_arguments(&am, block, NULL, param, NULL, match_am);
4744 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4745 set_am_attributes(cnt, &am);
4746 set_ia32_ls_mode(cnt, get_irn_mode(param));
4748 SET_IA32_ORIG_NODE(cnt, node);
4749 return fix_mem_proj(cnt, &am);
4753 * Transform builtin ffs.
4755 static ir_node *gen_ffs(ir_node *node)
4757 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4758 ir_node *real = skip_Proj(bsf);
4759 dbg_info *dbgi = get_irn_dbg_info(real);
4760 ir_node *block = get_nodes_block(real);
4761 ir_node *flag, *set, *conv, *neg, *or;
4764 if (get_irn_mode(real) != mode_T) {
4765 set_irn_mode(real, mode_T);
4766 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4769 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4772 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4773 SET_IA32_ORIG_NODE(set, node);
4776 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4777 SET_IA32_ORIG_NODE(conv, node);
4780 neg = new_bd_ia32_Neg(dbgi, block, conv);
4783 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4784 set_ia32_commutative(or);
4787 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4791 * Transform builtin clz.
4793 static ir_node *gen_clz(ir_node *node)
4795 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4796 ir_node *real = skip_Proj(bsr);
4797 dbg_info *dbgi = get_irn_dbg_info(real);
4798 ir_node *block = get_nodes_block(real);
4799 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4801 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4805 * Transform builtin ctz.
4807 static ir_node *gen_ctz(ir_node *node)
4809 return gen_unop_AM(node, new_bd_ia32_Bsf);
4813 * Transform builtin parity.
4815 static ir_node *gen_parity(ir_node *node)
4817 ir_node *param = get_Builtin_param(node, 0);
4818 dbg_info *dbgi = get_irn_dbg_info(node);
4820 ir_node *block = get_nodes_block(node);
4822 ir_node *new_block = be_transform_node(block);
4823 ir_node *imm, *cmp, *new_node;
4825 ia32_address_mode_t am;
4826 ia32_address_t *addr = &am.addr;
4830 match_arguments(&am, block, NULL, param, NULL, match_am);
4831 imm = ia32_create_Immediate(NULL, 0, 0);
4832 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4833 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4834 set_am_attributes(cmp, &am);
4835 set_ia32_ls_mode(cmp, mode_Iu);
4837 SET_IA32_ORIG_NODE(cmp, node);
4839 cmp = fix_mem_proj(cmp, &am);
4842 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4843 SET_IA32_ORIG_NODE(new_node, node);
4846 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4847 nomem, new_node, mode_Bu);
4848 SET_IA32_ORIG_NODE(new_node, node);
4853 * Transform builtin popcount
4855 static ir_node *gen_popcount(ir_node *node) {
4856 ir_node *param = get_Builtin_param(node, 0);
4857 dbg_info *dbgi = get_irn_dbg_info(node);
4859 ir_node *block = get_nodes_block(node);
4860 ir_node *new_block = be_transform_node(block);
4863 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4865 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4866 if (ia32_cg_config.use_popcnt) {
4867 ia32_address_mode_t am;
4868 ia32_address_t *addr = &am.addr;
4871 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4873 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4874 set_am_attributes(cnt, &am);
4875 set_ia32_ls_mode(cnt, get_irn_mode(param));
4877 SET_IA32_ORIG_NODE(cnt, node);
4878 return fix_mem_proj(cnt, &am);
4881 new_param = be_transform_node(param);
4883 /* do the standard popcount algo */
4885 /* m1 = x & 0x55555555 */
4886 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4887 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4890 simm = ia32_create_Immediate(NULL, 0, 1);
4891 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4893 /* m2 = s1 & 0x55555555 */
4894 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4897 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4899 /* m4 = m3 & 0x33333333 */
4900 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4901 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4904 simm = ia32_create_Immediate(NULL, 0, 2);
4905 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4907 /* m5 = s2 & 0x33333333 */
4908 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4911 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4913 /* m7 = m6 & 0x0F0F0F0F */
4914 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4915 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4918 simm = ia32_create_Immediate(NULL, 0, 4);
4919 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4921 /* m8 = s3 & 0x0F0F0F0F */
4922 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4925 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4927 /* m10 = m9 & 0x00FF00FF */
4928 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4929 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4932 simm = ia32_create_Immediate(NULL, 0, 8);
4933 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4935 /* m11 = s4 & 0x00FF00FF */
4936 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4938 /* m12 = m10 + m11 */
4939 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4941 /* m13 = m12 & 0x0000FFFF */
4942 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4943 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4945 /* s5 = m12 >> 16 */
4946 simm = ia32_create_Immediate(NULL, 0, 16);
4947 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4949 /* res = m13 + s5 */
4950 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4954 * Transform builtin byte swap.
4956 static ir_node *gen_bswap(ir_node *node) {
4957 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4958 dbg_info *dbgi = get_irn_dbg_info(node);
4960 ir_node *block = get_nodes_block(node);
4961 ir_node *new_block = be_transform_node(block);
4962 ir_mode *mode = get_irn_mode(param);
4963 unsigned size = get_mode_size_bits(mode);
4964 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4968 if (ia32_cg_config.use_i486) {
4969 /* swap available */
4970 return new_bd_ia32_Bswap(dbgi, new_block, param);
4972 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4973 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4975 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4976 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4978 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4980 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4981 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4983 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4984 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4987 /* swap16 always available */
4988 return new_bd_ia32_Bswap16(dbgi, new_block, param);
4991 panic("Invalid bswap size (%d)", size);
4996 * Transform builtin outport.
4998 static ir_node *gen_outport(ir_node *node) {
4999 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5000 ir_node *oldv = get_Builtin_param(node, 1);
5001 ir_mode *mode = get_irn_mode(oldv);
5002 ir_node *value = be_transform_node(oldv);
5003 ir_node *block = be_transform_node(get_nodes_block(node));
5004 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5005 dbg_info *dbgi = get_irn_dbg_info(node);
5007 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5008 set_ia32_ls_mode(res, mode);
5013 * Transform builtin inport.
5015 static ir_node *gen_inport(ir_node *node) {
5016 ir_type *tp = get_Builtin_type(node);
5017 ir_type *rstp = get_method_res_type(tp, 0);
5018 ir_mode *mode = get_type_mode(rstp);
5019 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5020 ir_node *block = be_transform_node(get_nodes_block(node));
5021 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5022 dbg_info *dbgi = get_irn_dbg_info(node);
5024 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5025 set_ia32_ls_mode(res, mode);
5027 /* check for missing Result Proj */
5032 * Transform a builtin inner trampoline
5034 static ir_node *gen_inner_trampoline(ir_node *node) {
5035 ir_node *ptr = get_Builtin_param(node, 0);
5036 ir_node *callee = get_Builtin_param(node, 1);
5037 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5038 ir_node *mem = get_Builtin_mem(node);
5039 ir_node *block = get_nodes_block(node);
5040 ir_node *new_block = be_transform_node(block);
5044 ir_node *trampoline;
5046 dbg_info *dbgi = get_irn_dbg_info(node);
5047 ia32_address_t addr;
5049 /* construct store address */
5050 memset(&addr, 0, sizeof(addr));
5051 ia32_create_address_mode(&addr, ptr, 0);
5053 if (addr.base == NULL) {
5054 addr.base = noreg_GP;
5056 addr.base = be_transform_node(addr.base);
5059 if (addr.index == NULL) {
5060 addr.index = noreg_GP;
5062 addr.index = be_transform_node(addr.index);
5064 addr.mem = be_transform_node(mem);
5066 /* mov ecx, <env> */
5067 val = ia32_create_Immediate(NULL, 0, 0xB9);
5068 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5069 addr.index, addr.mem, val);
5070 set_irn_pinned(store, get_irn_pinned(node));
5071 set_ia32_op_type(store, ia32_AddrModeD);
5072 set_ia32_ls_mode(store, mode_Bu);
5073 set_address(store, &addr);
5077 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5078 addr.index, addr.mem, env);
5079 set_irn_pinned(store, get_irn_pinned(node));
5080 set_ia32_op_type(store, ia32_AddrModeD);
5081 set_ia32_ls_mode(store, mode_Iu);
5082 set_address(store, &addr);
5086 /* jmp rel <callee> */
5087 val = ia32_create_Immediate(NULL, 0, 0xE9);
5088 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5089 addr.index, addr.mem, val);
5090 set_irn_pinned(store, get_irn_pinned(node));
5091 set_ia32_op_type(store, ia32_AddrModeD);
5092 set_ia32_ls_mode(store, mode_Bu);
5093 set_address(store, &addr);
5097 trampoline = be_transform_node(ptr);
5099 /* the callee is typically an immediate */
5100 if (is_SymConst(callee)) {
5101 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5103 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5105 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5107 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5108 addr.index, addr.mem, rel);
5109 set_irn_pinned(store, get_irn_pinned(node));
5110 set_ia32_op_type(store, ia32_AddrModeD);
5111 set_ia32_ls_mode(store, mode_Iu);
5112 set_address(store, &addr);
5117 return new_r_Tuple(new_block, 2, in);
5121 * Transform Builtin node.
5123 static ir_node *gen_Builtin(ir_node *node) {
5124 ir_builtin_kind kind = get_Builtin_kind(node);
5128 return gen_trap(node);
5129 case ir_bk_debugbreak:
5130 return gen_debugbreak(node);
5131 case ir_bk_return_address:
5132 return gen_return_address(node);
5133 case ir_bk_frame_address:
5134 return gen_frame_address(node);
5135 case ir_bk_prefetch:
5136 return gen_prefetch(node);
5138 return gen_ffs(node);
5140 return gen_clz(node);
5142 return gen_ctz(node);
5144 return gen_parity(node);
5145 case ir_bk_popcount:
5146 return gen_popcount(node);
5148 return gen_bswap(node);
5150 return gen_outport(node);
5152 return gen_inport(node);
5153 case ir_bk_inner_trampoline:
5154 return gen_inner_trampoline(node);
5156 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5160 * Transform Proj(Builtin) node.
5162 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5163 ir_node *node = get_Proj_pred(proj);
5164 ir_node *new_node = be_transform_node(node);
5165 ir_builtin_kind kind = get_Builtin_kind(node);
5168 case ir_bk_return_address:
5169 case ir_bk_frame_address:
5174 case ir_bk_popcount:
5176 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5179 case ir_bk_debugbreak:
5180 case ir_bk_prefetch:
5182 assert(get_Proj_proj(proj) == pn_Builtin_M);
5185 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5186 return new_r_Proj(get_nodes_block(new_node),
5187 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5189 assert(get_Proj_proj(proj) == pn_Builtin_M);
5190 return new_r_Proj(get_nodes_block(new_node),
5191 new_node, mode_M, pn_ia32_Inport_M);
5193 case ir_bk_inner_trampoline:
5194 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5195 return get_Tuple_pred(new_node, 1);
5197 assert(get_Proj_proj(proj) == pn_Builtin_M);
5198 return get_Tuple_pred(new_node, 0);
5201 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5204 static ir_node *gen_be_IncSP(ir_node *node)
5206 ir_node *res = be_duplicate_node(node);
5207 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5213 * Transform the Projs from a be_Call.
5215 static ir_node *gen_Proj_be_Call(ir_node *node)
5217 ir_node *block = be_transform_node(get_nodes_block(node));
5218 ir_node *call = get_Proj_pred(node);
5219 ir_node *new_call = be_transform_node(call);
5220 dbg_info *dbgi = get_irn_dbg_info(node);
5221 long proj = get_Proj_proj(node);
5222 ir_mode *mode = get_irn_mode(node);
5225 if (proj == pn_be_Call_M_regular) {
5226 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5228 /* transform call modes */
5229 if (mode_is_data(mode)) {
5230 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5234 /* Map from be_Call to ia32_Call proj number */
5235 if (proj == pn_be_Call_sp) {
5236 proj = pn_ia32_Call_stack;
5237 } else if (proj == pn_be_Call_M_regular) {
5238 proj = pn_ia32_Call_M;
5240 arch_register_req_t const *const req = arch_get_register_req_out(node);
5241 int const n_outs = arch_irn_get_n_outs(new_call);
5244 assert(proj >= pn_be_Call_first_res);
5245 assert(req->type & arch_register_req_type_limited);
5247 for (i = 0; i < n_outs; ++i) {
5248 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5250 if (!(new_req->type & arch_register_req_type_limited) ||
5251 new_req->cls != req->cls ||
5252 *new_req->limited != *req->limited)
5261 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5263 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5265 case pn_ia32_Call_stack:
5266 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5269 case pn_ia32_Call_fpcw:
5270 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5278 * Transform the Projs from a Cmp.
5280 static ir_node *gen_Proj_Cmp(ir_node *node)
5282 /* this probably means not all mode_b nodes were lowered... */
5283 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5288 * Transform the Projs from a Bound.
5290 static ir_node *gen_Proj_Bound(ir_node *node)
5292 ir_node *new_node, *block;
5293 ir_node *pred = get_Proj_pred(node);
5295 switch (get_Proj_proj(node)) {
5297 return be_transform_node(get_Bound_mem(pred));
5298 case pn_Bound_X_regular:
5299 new_node = be_transform_node(pred);
5300 block = get_nodes_block(new_node);
5301 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5302 case pn_Bound_X_except:
5303 new_node = be_transform_node(pred);
5304 block = get_nodes_block(new_node);
5305 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5307 return be_transform_node(get_Bound_index(pred));
5309 panic("unsupported Proj from Bound");
5313 static ir_node *gen_Proj_ASM(ir_node *node)
5315 ir_mode *mode = get_irn_mode(node);
5316 ir_node *pred = get_Proj_pred(node);
5317 ir_node *new_pred = be_transform_node(pred);
5318 ir_node *block = get_nodes_block(new_pred);
5319 long pos = get_Proj_proj(node);
5321 if (mode == mode_M) {
5322 pos = arch_irn_get_n_outs(new_pred)-1;
5323 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5325 } else if (mode_is_float(mode)) {
5328 panic("unexpected proj mode at ASM");
5331 return new_r_Proj(block, new_pred, mode, pos);
5335 * Transform and potentially renumber Proj nodes.
5337 static ir_node *gen_Proj(ir_node *node)
5339 ir_node *pred = get_Proj_pred(node);
5342 switch (get_irn_opcode(pred)) {
5344 proj = get_Proj_proj(node);
5345 if (proj == pn_Store_M) {
5346 return be_transform_node(pred);
5348 panic("No idea how to transform proj->Store");
5351 return gen_Proj_Load(node);
5353 return gen_Proj_ASM(node);
5355 return gen_Proj_Builtin(node);
5359 return gen_Proj_DivMod(node);
5361 return gen_Proj_CopyB(node);
5363 return gen_Proj_Quot(node);
5365 return gen_Proj_be_SubSP(node);
5367 return gen_Proj_be_AddSP(node);
5369 return gen_Proj_be_Call(node);
5371 return gen_Proj_Cmp(node);
5373 return gen_Proj_Bound(node);
5375 proj = get_Proj_proj(node);
5377 case pn_Start_X_initial_exec: {
5378 ir_node *block = get_nodes_block(pred);
5379 ir_node *new_block = be_transform_node(block);
5380 dbg_info *dbgi = get_irn_dbg_info(node);
5381 /* we exchange the ProjX with a jump */
5382 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5387 case pn_Start_P_tls:
5388 return gen_Proj_tls(node);
5393 if (is_ia32_l_FloattoLL(pred)) {
5394 return gen_Proj_l_FloattoLL(node);
5396 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5400 ir_mode *mode = get_irn_mode(node);
5401 if (ia32_mode_needs_gp_reg(mode)) {
5402 ir_node *new_pred = be_transform_node(pred);
5403 ir_node *block = be_transform_node(get_nodes_block(node));
5404 ir_node *new_proj = new_r_Proj(block, new_pred,
5405 mode_Iu, get_Proj_proj(node));
5406 new_proj->node_nr = node->node_nr;
5411 return be_duplicate_node(node);
5415 * Enters all transform functions into the generic pointer
5417 static void register_transformers(void)
5419 /* first clear the generic function pointer for all ops */
5420 clear_irp_opcodes_generic_func();
5422 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5423 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5462 /* transform ops from intrinsic lowering */
5474 GEN(ia32_l_LLtoFloat);
5475 GEN(ia32_l_FloattoLL);
5481 /* we should never see these nodes */
5496 /* handle builtins */
5499 /* handle generic backend nodes */
5513 * Pre-transform all unknown and noreg nodes.
5515 static void ia32_pretransform_node(void)
5517 ia32_code_gen_t *cg = env_cg;
5519 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5520 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5521 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5522 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5523 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5524 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5526 nomem = get_irg_no_mem(current_ir_graph);
5527 noreg_GP = ia32_new_NoReg_gp(cg);
5533 * Walker, checks if all ia32 nodes producing more than one result have their
5534 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5536 static void add_missing_keep_walker(ir_node *node, void *data)
5539 unsigned found_projs = 0;
5540 const ir_edge_t *edge;
5541 ir_mode *mode = get_irn_mode(node);
5546 if (!is_ia32_irn(node))
5549 n_outs = arch_irn_get_n_outs(node);
5552 if (is_ia32_SwitchJmp(node))
5555 assert(n_outs < (int) sizeof(unsigned) * 8);
5556 foreach_out_edge(node, edge) {
5557 ir_node *proj = get_edge_src_irn(edge);
5560 /* The node could be kept */
5564 if (get_irn_mode(proj) == mode_M)
5567 pn = get_Proj_proj(proj);
5568 assert(pn < n_outs);
5569 found_projs |= 1 << pn;
5573 /* are keeps missing? */
5575 for (i = 0; i < n_outs; ++i) {
5578 const arch_register_req_t *req;
5579 const arch_register_class_t *cls;
5581 if (found_projs & (1 << i)) {
5585 req = get_ia32_out_req(node, i);
5590 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5594 block = get_nodes_block(node);
5595 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5596 if (last_keep != NULL) {
5597 be_Keep_add_node(last_keep, cls, in[0]);
5599 last_keep = be_new_Keep(cls, block, 1, in);
5600 if (sched_is_scheduled(node)) {
5601 sched_add_after(node, last_keep);
5608 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5611 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5613 ir_graph *irg = be_get_birg_irg(cg->birg);
5614 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5618 * Post-process all calls if we are in SSE mode.
5619 * The ABI requires that the results are in st0, copy them
5620 * to a xmm register.
5622 static void postprocess_fp_call_results(void) {
5625 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5626 ir_node *call = call_list[i];
5627 ir_type *mtp = call_types[i];
5630 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5631 ir_type *res_tp = get_method_res_type(mtp, j);
5632 ir_node *res, *new_res;
5633 const ir_edge_t *edge, *next;
5636 if (! is_atomic_type(res_tp)) {
5637 /* no floating point return */
5640 mode = get_type_mode(res_tp);
5641 if (! mode_is_float(mode)) {
5642 /* no floating point return */
5646 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5649 /* now patch the users */
5650 foreach_out_edge_safe(res, edge, next) {
5651 ir_node *succ = get_edge_src_irn(edge);
5654 if (be_is_Keep(succ))
5657 if (is_ia32_xStore(succ)) {
5658 /* an xStore can be patched into an vfst */
5659 dbg_info *db = get_irn_dbg_info(succ);
5660 ir_node *block = get_nodes_block(succ);
5661 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5662 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5663 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5664 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5665 ir_mode *mode = get_ia32_ls_mode(succ);
5667 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5668 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5669 if (is_ia32_use_frame(succ))
5670 set_ia32_use_frame(st);
5671 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5672 set_irn_pinned(st, get_irn_pinned(succ));
5673 set_ia32_op_type(st, ia32_AddrModeD);
5677 if (new_res == NULL) {
5678 dbg_info *db = get_irn_dbg_info(call);
5679 ir_node *block = get_nodes_block(call);
5680 ir_node *frame = get_irg_frame(current_ir_graph);
5681 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5682 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5683 ir_node *vfst, *xld, *new_mem;
5685 /* store st(0) on stack */
5686 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5687 set_ia32_op_type(vfst, ia32_AddrModeD);
5688 set_ia32_use_frame(vfst);
5690 /* load into SSE register */
5691 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5692 set_ia32_op_type(xld, ia32_AddrModeS);
5693 set_ia32_use_frame(xld);
5695 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5696 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5698 if (old_mem != NULL) {
5699 edges_reroute(old_mem, new_mem, current_ir_graph);
5703 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5710 /* do the transformation */
5711 void ia32_transform_graph(ia32_code_gen_t *cg)
5715 register_transformers();
5717 initial_fpcw = NULL;
5720 BE_TIMER_PUSH(t_heights);
5721 heights = heights_new(cg->irg);
5722 BE_TIMER_POP(t_heights);
5723 ia32_calculate_non_address_mode_nodes(cg->birg);
5725 /* the transform phase is not safe for CSE (yet) because several nodes get
5726 * attributes set after their creation */
5727 cse_last = get_opt_cse();
5730 call_list = NEW_ARR_F(ir_node *, 0);
5731 call_types = NEW_ARR_F(ir_type *, 0);
5732 be_transform_graph(cg->birg, ia32_pretransform_node);
5734 if (ia32_cg_config.use_sse2)
5735 postprocess_fp_call_results();
5736 DEL_ARR_F(call_types);
5737 DEL_ARR_F(call_list);
5739 set_opt_cse(cse_last);
5741 ia32_free_non_address_mode_nodes();
5742 heights_free(heights);
5746 void ia32_init_transform(void)
5748 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");