2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (mode_is_signed(mode)) {
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1302 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1303 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1305 return proj_res_high;
1309 * Creates an ia32 And.
1311 * @return The created ia32 And node
1313 static ir_node *gen_And(ir_node *node)
1315 ir_node *op1 = get_And_left(node);
1316 ir_node *op2 = get_And_right(node);
1317 assert(! mode_is_float(get_irn_mode(node)));
1319 /* is it a zero extension? */
1320 if (is_Const(op2)) {
1321 tarval *tv = get_Const_tarval(op2);
1322 long v = get_tarval_long(tv);
1324 if (v == 0xFF || v == 0xFFFF) {
1325 dbg_info *dbgi = get_irn_dbg_info(node);
1326 ir_node *block = get_nodes_block(node);
1333 assert(v == 0xFFFF);
1336 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1341 return gen_binop(node, op1, op2, new_bd_ia32_And,
1342 match_commutative | match_mode_neutral | match_am | match_immediate);
1348 * Creates an ia32 Or.
1350 * @return The created ia32 Or node
1352 static ir_node *gen_Or(ir_node *node)
1354 ir_node *op1 = get_Or_left(node);
1355 ir_node *op2 = get_Or_right(node);
1357 assert (! mode_is_float(get_irn_mode(node)));
1358 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1359 | match_mode_neutral | match_am | match_immediate);
1365 * Creates an ia32 Eor.
1367 * @return The created ia32 Eor node
1369 static ir_node *gen_Eor(ir_node *node)
1371 ir_node *op1 = get_Eor_left(node);
1372 ir_node *op2 = get_Eor_right(node);
1374 assert(! mode_is_float(get_irn_mode(node)));
1375 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1376 | match_mode_neutral | match_am | match_immediate);
1381 * Creates an ia32 Sub.
1383 * @return The created ia32 Sub node
1385 static ir_node *gen_Sub(ir_node *node)
1387 ir_node *op1 = get_Sub_left(node);
1388 ir_node *op2 = get_Sub_right(node);
1389 ir_mode *mode = get_irn_mode(node);
1391 if (mode_is_float(mode)) {
1392 if (ia32_cg_config.use_sse2)
1393 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1395 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1398 if (is_Const(op2)) {
1399 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1403 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1404 | match_am | match_immediate);
1407 static ir_node *transform_AM_mem(ir_node *const block,
1408 ir_node *const src_val,
1409 ir_node *const src_mem,
1410 ir_node *const am_mem)
1412 if (is_NoMem(am_mem)) {
1413 return be_transform_node(src_mem);
1414 } else if (is_Proj(src_val) &&
1416 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1417 /* avoid memory loop */
1419 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1420 ir_node *const ptr_pred = get_Proj_pred(src_val);
1421 int const arity = get_Sync_n_preds(src_mem);
1426 NEW_ARR_A(ir_node*, ins, arity + 1);
1428 /* NOTE: This sometimes produces dead-code because the old sync in
1429 * src_mem might not be used anymore, we should detect this case
1430 * and kill the sync... */
1431 for (i = arity - 1; i >= 0; --i) {
1432 ir_node *const pred = get_Sync_pred(src_mem, i);
1434 /* avoid memory loop */
1435 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1438 ins[n++] = be_transform_node(pred);
1443 return new_r_Sync(block, n, ins);
1447 ins[0] = be_transform_node(src_mem);
1449 return new_r_Sync(block, 2, ins);
1454 * Create a 32bit to 64bit signed extension.
1456 * @param dbgi debug info
1457 * @param block the block where node nodes should be placed
1458 * @param val the value to extend
1459 * @param orig the original node
1461 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1462 ir_node *val, const ir_node *orig)
1467 if (ia32_cg_config.use_short_sex_eax) {
1468 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1469 be_dep_on_frame(pval);
1470 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1472 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1473 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1475 SET_IA32_ORIG_NODE(res, orig);
1480 * Generates an ia32 DivMod with additional infrastructure for the
1481 * register allocator if needed.
1483 static ir_node *create_Div(ir_node *node)
1485 dbg_info *dbgi = get_irn_dbg_info(node);
1486 ir_node *block = get_nodes_block(node);
1487 ir_node *new_block = be_transform_node(block);
1494 ir_node *sign_extension;
1495 ia32_address_mode_t am;
1496 ia32_address_t *addr = &am.addr;
1498 /* the upper bits have random contents for smaller modes */
1499 switch (get_irn_opcode(node)) {
1501 op1 = get_Div_left(node);
1502 op2 = get_Div_right(node);
1503 mem = get_Div_mem(node);
1504 mode = get_Div_resmode(node);
1507 op1 = get_Mod_left(node);
1508 op2 = get_Mod_right(node);
1509 mem = get_Mod_mem(node);
1510 mode = get_Mod_resmode(node);
1513 op1 = get_DivMod_left(node);
1514 op2 = get_DivMod_right(node);
1515 mem = get_DivMod_mem(node);
1516 mode = get_DivMod_resmode(node);
1519 panic("invalid divmod node %+F", node);
1522 match_arguments(&am, block, op1, op2, NULL, match_am);
1524 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1525 is the memory of the consumed address. We can have only the second op as address
1526 in Div nodes, so check only op2. */
1527 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1529 if (mode_is_signed(mode)) {
1530 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1531 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1532 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1534 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1535 be_dep_on_frame(sign_extension);
1537 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1538 addr->index, new_mem, am.new_op2,
1539 am.new_op1, sign_extension);
1542 set_irn_pinned(new_node, get_irn_pinned(node));
1544 set_am_attributes(new_node, &am);
1545 SET_IA32_ORIG_NODE(new_node, node);
1547 new_node = fix_mem_proj(new_node, &am);
1553 * Generates an ia32 Mod.
1555 static ir_node *gen_Mod(ir_node *node)
1557 return create_Div(node);
1561 * Generates an ia32 Div.
1563 static ir_node *gen_Div(ir_node *node)
1565 return create_Div(node);
1569 * Generates an ia32 DivMod.
1571 static ir_node *gen_DivMod(ir_node *node)
1573 return create_Div(node);
1579 * Creates an ia32 floating Div.
1581 * @return The created ia32 xDiv node
1583 static ir_node *gen_Quot(ir_node *node)
1585 ir_node *op1 = get_Quot_left(node);
1586 ir_node *op2 = get_Quot_right(node);
1588 if (ia32_cg_config.use_sse2) {
1589 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1591 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1597 * Creates an ia32 Shl.
1599 * @return The created ia32 Shl node
1601 static ir_node *gen_Shl(ir_node *node)
1603 ir_node *left = get_Shl_left(node);
1604 ir_node *right = get_Shl_right(node);
1606 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1607 match_mode_neutral | match_immediate);
1611 * Creates an ia32 Shr.
1613 * @return The created ia32 Shr node
1615 static ir_node *gen_Shr(ir_node *node)
1617 ir_node *left = get_Shr_left(node);
1618 ir_node *right = get_Shr_right(node);
1620 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1626 * Creates an ia32 Sar.
1628 * @return The created ia32 Shrs node
1630 static ir_node *gen_Shrs(ir_node *node)
1632 ir_node *left = get_Shrs_left(node);
1633 ir_node *right = get_Shrs_right(node);
1635 if (is_Const(right)) {
1636 tarval *tv = get_Const_tarval(right);
1637 long val = get_tarval_long(tv);
1639 /* this is a sign extension */
1640 dbg_info *dbgi = get_irn_dbg_info(node);
1641 ir_node *block = be_transform_node(get_nodes_block(node));
1642 ir_node *new_op = be_transform_node(left);
1644 return create_sex_32_64(dbgi, block, new_op, node);
1648 /* 8 or 16 bit sign extension? */
1649 if (is_Const(right) && is_Shl(left)) {
1650 ir_node *shl_left = get_Shl_left(left);
1651 ir_node *shl_right = get_Shl_right(left);
1652 if (is_Const(shl_right)) {
1653 tarval *tv1 = get_Const_tarval(right);
1654 tarval *tv2 = get_Const_tarval(shl_right);
1655 if (tv1 == tv2 && tarval_is_long(tv1)) {
1656 long val = get_tarval_long(tv1);
1657 if (val == 16 || val == 24) {
1658 dbg_info *dbgi = get_irn_dbg_info(node);
1659 ir_node *block = get_nodes_block(node);
1669 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1678 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1684 * Creates an ia32 Rol.
1686 * @param op1 The first operator
1687 * @param op2 The second operator
1688 * @return The created ia32 RotL node
1690 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1692 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1698 * Creates an ia32 Ror.
1699 * NOTE: There is no RotR with immediate because this would always be a RotL
1700 * "imm-mode_size_bits" which can be pre-calculated.
1702 * @param op1 The first operator
1703 * @param op2 The second operator
1704 * @return The created ia32 RotR node
1706 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1708 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1714 * Creates an ia32 RotR or RotL (depending on the found pattern).
1716 * @return The created ia32 RotL or RotR node
1718 static ir_node *gen_Rotl(ir_node *node)
1720 ir_node *rotate = NULL;
1721 ir_node *op1 = get_Rotl_left(node);
1722 ir_node *op2 = get_Rotl_right(node);
1724 /* Firm has only RotL, so we are looking for a right (op2)
1725 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1726 that means we can create a RotR instead of an Add and a RotL */
1730 ir_node *left = get_Add_left(add);
1731 ir_node *right = get_Add_right(add);
1732 if (is_Const(right)) {
1733 tarval *tv = get_Const_tarval(right);
1734 ir_mode *mode = get_irn_mode(node);
1735 long bits = get_mode_size_bits(mode);
1737 if (is_Minus(left) &&
1738 tarval_is_long(tv) &&
1739 get_tarval_long(tv) == bits &&
1742 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1743 rotate = gen_Ror(node, op1, get_Minus_op(left));
1748 if (rotate == NULL) {
1749 rotate = gen_Rol(node, op1, op2);
1758 * Transforms a Minus node.
1760 * @return The created ia32 Minus node
1762 static ir_node *gen_Minus(ir_node *node)
1764 ir_node *op = get_Minus_op(node);
1765 ir_node *block = be_transform_node(get_nodes_block(node));
1766 dbg_info *dbgi = get_irn_dbg_info(node);
1767 ir_mode *mode = get_irn_mode(node);
1772 if (mode_is_float(mode)) {
1773 ir_node *new_op = be_transform_node(op);
1774 if (ia32_cg_config.use_sse2) {
1775 /* TODO: non-optimal... if we have many xXors, then we should
1776 * rather create a load for the const and use that instead of
1777 * several AM nodes... */
1778 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1780 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1781 nomem, new_op, noreg_xmm);
1783 size = get_mode_size_bits(mode);
1784 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1786 set_ia32_am_sc(new_node, ent);
1787 set_ia32_op_type(new_node, ia32_AddrModeS);
1788 set_ia32_ls_mode(new_node, mode);
1790 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1793 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1796 SET_IA32_ORIG_NODE(new_node, node);
1802 * Transforms a Not node.
1804 * @return The created ia32 Not node
1806 static ir_node *gen_Not(ir_node *node)
1808 ir_node *op = get_Not_op(node);
1810 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1811 assert (! mode_is_float(get_irn_mode(node)));
1813 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1819 * Transforms an Abs node.
1821 * @return The created ia32 Abs node
1823 static ir_node *gen_Abs(ir_node *node)
1825 ir_node *block = get_nodes_block(node);
1826 ir_node *new_block = be_transform_node(block);
1827 ir_node *op = get_Abs_op(node);
1828 dbg_info *dbgi = get_irn_dbg_info(node);
1829 ir_mode *mode = get_irn_mode(node);
1835 if (mode_is_float(mode)) {
1836 new_op = be_transform_node(op);
1838 if (ia32_cg_config.use_sse2) {
1839 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1840 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1841 nomem, new_op, noreg_fp);
1843 size = get_mode_size_bits(mode);
1844 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1846 set_ia32_am_sc(new_node, ent);
1848 SET_IA32_ORIG_NODE(new_node, node);
1850 set_ia32_op_type(new_node, ia32_AddrModeS);
1851 set_ia32_ls_mode(new_node, mode);
1853 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1854 SET_IA32_ORIG_NODE(new_node, node);
1857 ir_node *xor, *sign_extension;
1859 if (get_mode_size_bits(mode) == 32) {
1860 new_op = be_transform_node(op);
1862 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1865 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1867 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1868 nomem, new_op, sign_extension);
1869 SET_IA32_ORIG_NODE(xor, node);
1871 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, xor, sign_extension);
1873 SET_IA32_ORIG_NODE(new_node, node);
1880 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1882 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1884 dbg_info *dbgi = get_irn_dbg_info(cmp);
1885 ir_node *block = get_nodes_block(cmp);
1886 ir_node *new_block = be_transform_node(block);
1887 ir_node *op1 = be_transform_node(x);
1888 ir_node *op2 = be_transform_node(n);
1890 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1894 * Transform a node returning a "flag" result.
1896 * @param node the node to transform
1897 * @param pnc_out the compare mode to use
1899 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1906 /* we have a Cmp as input */
1907 if (is_Proj(node)) {
1908 ir_node *pred = get_Proj_pred(node);
1910 pn_Cmp pnc = get_Proj_proj(node);
1911 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1912 ir_node *l = get_Cmp_left(pred);
1913 ir_node *r = get_Cmp_right(pred);
1915 ir_node *la = get_And_left(l);
1916 ir_node *ra = get_And_right(l);
1918 ir_node *c = get_Shl_left(la);
1919 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1920 /* (1 << n) & ra) */
1921 ir_node *n = get_Shl_right(la);
1922 flags = gen_bt(pred, ra, n);
1923 /* we must generate a Jc/Jnc jump */
1924 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1927 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1932 ir_node *c = get_Shl_left(ra);
1933 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1934 /* la & (1 << n)) */
1935 ir_node *n = get_Shl_right(ra);
1936 flags = gen_bt(pred, la, n);
1937 /* we must generate a Jc/Jnc jump */
1938 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1941 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1947 flags = be_transform_node(pred);
1953 /* a mode_b value, we have to compare it against 0 */
1954 dbgi = get_irn_dbg_info(node);
1955 new_block = be_transform_node(get_nodes_block(node));
1956 new_op = be_transform_node(node);
1957 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1958 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1959 *pnc_out = pn_Cmp_Lg;
1964 * Transforms a Load.
1966 * @return the created ia32 Load node
1968 static ir_node *gen_Load(ir_node *node)
1970 ir_node *old_block = get_nodes_block(node);
1971 ir_node *block = be_transform_node(old_block);
1972 ir_node *ptr = get_Load_ptr(node);
1973 ir_node *mem = get_Load_mem(node);
1974 ir_node *new_mem = be_transform_node(mem);
1977 dbg_info *dbgi = get_irn_dbg_info(node);
1978 ir_mode *mode = get_Load_mode(node);
1981 ia32_address_t addr;
1983 /* construct load address */
1984 memset(&addr, 0, sizeof(addr));
1985 ia32_create_address_mode(&addr, ptr, 0);
1992 base = be_transform_node(base);
1995 if (index == NULL) {
1998 index = be_transform_node(index);
2001 if (mode_is_float(mode)) {
2002 if (ia32_cg_config.use_sse2) {
2003 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2005 res_mode = mode_xmm;
2007 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2009 res_mode = mode_vfp;
2012 assert(mode != mode_b);
2014 /* create a conv node with address mode for smaller modes */
2015 if (get_mode_size_bits(mode) < 32) {
2016 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2017 new_mem, noreg_GP, mode);
2019 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2024 set_irn_pinned(new_node, get_irn_pinned(node));
2025 set_ia32_op_type(new_node, ia32_AddrModeS);
2026 set_ia32_ls_mode(new_node, mode);
2027 set_address(new_node, &addr);
2029 if (get_irn_pinned(node) == op_pin_state_floats) {
2030 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2031 && pn_ia32_vfld_res == pn_ia32_Load_res
2032 && pn_ia32_Load_res == pn_ia32_res);
2033 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2036 SET_IA32_ORIG_NODE(new_node, node);
2038 be_dep_on_frame(new_node);
2042 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2043 ir_node *ptr, ir_node *other)
2050 /* we only use address mode if we're the only user of the load */
2051 if (get_irn_n_edges(node) > 1)
2054 load = get_Proj_pred(node);
2057 if (get_nodes_block(load) != block)
2060 /* store should have the same pointer as the load */
2061 if (get_Load_ptr(load) != ptr)
2064 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2065 if (other != NULL &&
2066 get_nodes_block(other) == block &&
2067 heights_reachable_in_block(heights, other, load)) {
2071 if (prevents_AM(block, load, mem))
2073 /* Store should be attached to the load via mem */
2074 assert(heights_reachable_in_block(heights, mem, load));
2079 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2080 ir_node *mem, ir_node *ptr, ir_mode *mode,
2081 construct_binop_dest_func *func,
2082 construct_binop_dest_func *func8bit,
2083 match_flags_t flags)
2085 ir_node *src_block = get_nodes_block(node);
2093 ia32_address_mode_t am;
2094 ia32_address_t *addr = &am.addr;
2095 memset(&am, 0, sizeof(am));
2097 assert(flags & match_immediate); /* there is no destam node without... */
2098 commutative = (flags & match_commutative) != 0;
2100 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2101 build_address(&am, op1, ia32_create_am_double_use);
2102 new_op = create_immediate_or_transform(op2, 0);
2103 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2104 build_address(&am, op2, ia32_create_am_double_use);
2105 new_op = create_immediate_or_transform(op1, 0);
2110 if (addr->base == NULL)
2111 addr->base = noreg_GP;
2112 if (addr->index == NULL)
2113 addr->index = noreg_GP;
2114 if (addr->mem == NULL)
2117 dbgi = get_irn_dbg_info(node);
2118 block = be_transform_node(src_block);
2119 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2121 if (get_mode_size_bits(mode) == 8) {
2122 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2124 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2126 set_address(new_node, addr);
2127 set_ia32_op_type(new_node, ia32_AddrModeD);
2128 set_ia32_ls_mode(new_node, mode);
2129 SET_IA32_ORIG_NODE(new_node, node);
2131 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2132 mem_proj = be_transform_node(am.mem_proj);
2133 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2138 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2139 ir_node *ptr, ir_mode *mode,
2140 construct_unop_dest_func *func)
2142 ir_node *src_block = get_nodes_block(node);
2148 ia32_address_mode_t am;
2149 ia32_address_t *addr = &am.addr;
2151 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2154 memset(&am, 0, sizeof(am));
2155 build_address(&am, op, ia32_create_am_double_use);
2157 dbgi = get_irn_dbg_info(node);
2158 block = be_transform_node(src_block);
2159 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2160 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2161 set_address(new_node, addr);
2162 set_ia32_op_type(new_node, ia32_AddrModeD);
2163 set_ia32_ls_mode(new_node, mode);
2164 SET_IA32_ORIG_NODE(new_node, node);
2166 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2167 mem_proj = be_transform_node(am.mem_proj);
2168 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2173 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2175 ir_mode *mode = get_irn_mode(node);
2176 ir_node *mux_true = get_Mux_true(node);
2177 ir_node *mux_false = get_Mux_false(node);
2187 ia32_address_t addr;
2189 if (get_mode_size_bits(mode) != 8)
2192 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2194 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2200 build_address_ptr(&addr, ptr, mem);
2202 dbgi = get_irn_dbg_info(node);
2203 block = get_nodes_block(node);
2204 new_block = be_transform_node(block);
2205 cond = get_Mux_sel(node);
2206 flags = get_flags_node(cond, &pnc);
2207 new_mem = be_transform_node(mem);
2208 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2209 addr.index, addr.mem, flags, pnc, negated);
2210 set_address(new_node, &addr);
2211 set_ia32_op_type(new_node, ia32_AddrModeD);
2212 set_ia32_ls_mode(new_node, mode);
2213 SET_IA32_ORIG_NODE(new_node, node);
2218 static ir_node *try_create_dest_am(ir_node *node)
2220 ir_node *val = get_Store_value(node);
2221 ir_node *mem = get_Store_mem(node);
2222 ir_node *ptr = get_Store_ptr(node);
2223 ir_mode *mode = get_irn_mode(val);
2224 unsigned bits = get_mode_size_bits(mode);
2229 /* handle only GP modes for now... */
2230 if (!ia32_mode_needs_gp_reg(mode))
2234 /* store must be the only user of the val node */
2235 if (get_irn_n_edges(val) > 1)
2237 /* skip pointless convs */
2239 ir_node *conv_op = get_Conv_op(val);
2240 ir_mode *pred_mode = get_irn_mode(conv_op);
2241 if (!ia32_mode_needs_gp_reg(pred_mode))
2243 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2251 /* value must be in the same block */
2252 if (get_nodes_block(node) != get_nodes_block(val))
2255 switch (get_irn_opcode(val)) {
2257 op1 = get_Add_left(val);
2258 op2 = get_Add_right(val);
2259 if (ia32_cg_config.use_incdec) {
2260 if (is_Const_1(op2)) {
2261 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2263 } else if (is_Const_Minus_1(op2)) {
2264 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2268 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2269 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2270 match_commutative | match_immediate);
2273 op1 = get_Sub_left(val);
2274 op2 = get_Sub_right(val);
2275 if (is_Const(op2)) {
2276 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2278 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2279 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2283 op1 = get_And_left(val);
2284 op2 = get_And_right(val);
2285 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2286 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2287 match_commutative | match_immediate);
2290 op1 = get_Or_left(val);
2291 op2 = get_Or_right(val);
2292 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2293 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2294 match_commutative | match_immediate);
2297 op1 = get_Eor_left(val);
2298 op2 = get_Eor_right(val);
2299 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2300 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2301 match_commutative | match_immediate);
2304 op1 = get_Shl_left(val);
2305 op2 = get_Shl_right(val);
2306 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2307 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2311 op1 = get_Shr_left(val);
2312 op2 = get_Shr_right(val);
2313 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2314 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2318 op1 = get_Shrs_left(val);
2319 op2 = get_Shrs_right(val);
2320 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2321 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2325 op1 = get_Rotl_left(val);
2326 op2 = get_Rotl_right(val);
2327 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2328 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2331 /* TODO: match ROR patterns... */
2333 new_node = try_create_SetMem(val, ptr, mem);
2336 op1 = get_Minus_op(val);
2337 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2340 /* should be lowered already */
2341 assert(mode != mode_b);
2342 op1 = get_Not_op(val);
2343 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2349 if (new_node != NULL) {
2350 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2351 get_irn_pinned(node) == op_pin_state_pinned) {
2352 set_irn_pinned(new_node, op_pin_state_pinned);
2359 static bool possible_int_mode_for_fp(ir_mode *mode)
2363 if (!mode_is_signed(mode))
2365 size = get_mode_size_bits(mode);
2366 if (size != 16 && size != 32)
2371 static int is_float_to_int_conv(const ir_node *node)
2373 ir_mode *mode = get_irn_mode(node);
2377 if (!possible_int_mode_for_fp(mode))
2382 conv_op = get_Conv_op(node);
2383 conv_mode = get_irn_mode(conv_op);
2385 if (!mode_is_float(conv_mode))
2392 * Transform a Store(floatConst) into a sequence of
2395 * @return the created ia32 Store node
2397 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2399 ir_mode *mode = get_irn_mode(cns);
2400 unsigned size = get_mode_size_bytes(mode);
2401 tarval *tv = get_Const_tarval(cns);
2402 ir_node *block = get_nodes_block(node);
2403 ir_node *new_block = be_transform_node(block);
2404 ir_node *ptr = get_Store_ptr(node);
2405 ir_node *mem = get_Store_mem(node);
2406 dbg_info *dbgi = get_irn_dbg_info(node);
2410 ia32_address_t addr;
2412 assert(size % 4 == 0);
2415 build_address_ptr(&addr, ptr, mem);
2419 get_tarval_sub_bits(tv, ofs) |
2420 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2421 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2422 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2423 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2425 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2426 addr.index, addr.mem, imm);
2428 set_irn_pinned(new_node, get_irn_pinned(node));
2429 set_ia32_op_type(new_node, ia32_AddrModeD);
2430 set_ia32_ls_mode(new_node, mode_Iu);
2431 set_address(new_node, &addr);
2432 SET_IA32_ORIG_NODE(new_node, node);
2435 ins[i++] = new_node;
2440 } while (size != 0);
2443 return new_rd_Sync(dbgi, new_block, i, ins);
2450 * Generate a vfist or vfisttp instruction.
2452 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2453 ir_node *mem, ir_node *val, ir_node **fist)
2457 if (ia32_cg_config.use_fisttp) {
2458 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2459 if other users exists */
2460 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2461 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2462 be_new_Keep(block, 1, &value);
2464 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2467 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2470 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2476 * Transforms a general (no special case) Store.
2478 * @return the created ia32 Store node
2480 static ir_node *gen_general_Store(ir_node *node)
2482 ir_node *val = get_Store_value(node);
2483 ir_mode *mode = get_irn_mode(val);
2484 ir_node *block = get_nodes_block(node);
2485 ir_node *new_block = be_transform_node(block);
2486 ir_node *ptr = get_Store_ptr(node);
2487 ir_node *mem = get_Store_mem(node);
2488 dbg_info *dbgi = get_irn_dbg_info(node);
2489 ir_node *new_val, *new_node, *store;
2490 ia32_address_t addr;
2492 /* check for destination address mode */
2493 new_node = try_create_dest_am(node);
2494 if (new_node != NULL)
2497 /* construct store address */
2498 memset(&addr, 0, sizeof(addr));
2499 ia32_create_address_mode(&addr, ptr, 0);
2501 if (addr.base == NULL) {
2502 addr.base = noreg_GP;
2504 addr.base = be_transform_node(addr.base);
2507 if (addr.index == NULL) {
2508 addr.index = noreg_GP;
2510 addr.index = be_transform_node(addr.index);
2512 addr.mem = be_transform_node(mem);
2514 if (mode_is_float(mode)) {
2515 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2517 while (is_Conv(val) && mode == get_irn_mode(val)) {
2518 ir_node *op = get_Conv_op(val);
2519 if (!mode_is_float(get_irn_mode(op)))
2523 new_val = be_transform_node(val);
2524 if (ia32_cg_config.use_sse2) {
2525 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2526 addr.index, addr.mem, new_val);
2528 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2529 addr.index, addr.mem, new_val, mode);
2532 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2533 val = get_Conv_op(val);
2535 /* TODO: is this optimisation still necessary at all (middleend)? */
2536 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2537 while (is_Conv(val)) {
2538 ir_node *op = get_Conv_op(val);
2539 if (!mode_is_float(get_irn_mode(op)))
2541 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2545 new_val = be_transform_node(val);
2546 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2548 new_val = create_immediate_or_transform(val, 0);
2549 assert(mode != mode_b);
2551 if (get_mode_size_bits(mode) == 8) {
2552 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2553 addr.index, addr.mem, new_val);
2555 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2556 addr.index, addr.mem, new_val);
2561 set_irn_pinned(store, get_irn_pinned(node));
2562 set_ia32_op_type(store, ia32_AddrModeD);
2563 set_ia32_ls_mode(store, mode);
2565 set_address(store, &addr);
2566 SET_IA32_ORIG_NODE(store, node);
2572 * Transforms a Store.
2574 * @return the created ia32 Store node
2576 static ir_node *gen_Store(ir_node *node)
2578 ir_node *val = get_Store_value(node);
2579 ir_mode *mode = get_irn_mode(val);
2581 if (mode_is_float(mode) && is_Const(val)) {
2582 /* We can transform every floating const store
2583 into a sequence of integer stores.
2584 If the constant is already in a register,
2585 it would be better to use it, but we don't
2586 have this information here. */
2587 return gen_float_const_Store(node, val);
2589 return gen_general_Store(node);
2593 * Transforms a Switch.
2595 * @return the created ia32 SwitchJmp node
2597 static ir_node *create_Switch(ir_node *node)
2599 dbg_info *dbgi = get_irn_dbg_info(node);
2600 ir_node *block = be_transform_node(get_nodes_block(node));
2601 ir_node *sel = get_Cond_selector(node);
2602 ir_node *new_sel = be_transform_node(sel);
2603 long switch_min = LONG_MAX;
2604 long switch_max = LONG_MIN;
2605 long default_pn = get_Cond_default_proj(node);
2607 const ir_edge_t *edge;
2609 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2611 /* determine the smallest switch case value */
2612 foreach_out_edge(node, edge) {
2613 ir_node *proj = get_edge_src_irn(edge);
2614 long pn = get_Proj_proj(proj);
2615 if (pn == default_pn)
2618 if (pn < switch_min)
2620 if (pn > switch_max)
2624 if ((unsigned long) (switch_max - switch_min) > 128000) {
2625 panic("Size of switch %+F bigger than 128000", node);
2628 if (switch_min != 0) {
2629 /* if smallest switch case is not 0 we need an additional sub */
2630 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2631 add_ia32_am_offs_int(new_sel, -switch_min);
2632 set_ia32_op_type(new_sel, ia32_AddrModeS);
2634 SET_IA32_ORIG_NODE(new_sel, node);
2637 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2638 SET_IA32_ORIG_NODE(new_node, node);
2644 * Transform a Cond node.
2646 static ir_node *gen_Cond(ir_node *node)
2648 ir_node *block = get_nodes_block(node);
2649 ir_node *new_block = be_transform_node(block);
2650 dbg_info *dbgi = get_irn_dbg_info(node);
2651 ir_node *sel = get_Cond_selector(node);
2652 ir_mode *sel_mode = get_irn_mode(sel);
2653 ir_node *flags = NULL;
2657 if (sel_mode != mode_b) {
2658 return create_Switch(node);
2661 /* we get flags from a Cmp */
2662 flags = get_flags_node(sel, &pnc);
2664 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2665 SET_IA32_ORIG_NODE(new_node, node);
2671 * Transform a be_Copy.
2673 static ir_node *gen_be_Copy(ir_node *node)
2675 ir_node *new_node = be_duplicate_node(node);
2676 ir_mode *mode = get_irn_mode(new_node);
2678 if (ia32_mode_needs_gp_reg(mode)) {
2679 set_irn_mode(new_node, mode_Iu);
2685 static ir_node *create_Fucom(ir_node *node)
2687 dbg_info *dbgi = get_irn_dbg_info(node);
2688 ir_node *block = get_nodes_block(node);
2689 ir_node *new_block = be_transform_node(block);
2690 ir_node *left = get_Cmp_left(node);
2691 ir_node *new_left = be_transform_node(left);
2692 ir_node *right = get_Cmp_right(node);
2696 if (ia32_cg_config.use_fucomi) {
2697 new_right = be_transform_node(right);
2698 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2700 set_ia32_commutative(new_node);
2701 SET_IA32_ORIG_NODE(new_node, node);
2703 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2704 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2706 new_right = be_transform_node(right);
2707 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2710 set_ia32_commutative(new_node);
2712 SET_IA32_ORIG_NODE(new_node, node);
2714 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2715 SET_IA32_ORIG_NODE(new_node, node);
2721 static ir_node *create_Ucomi(ir_node *node)
2723 dbg_info *dbgi = get_irn_dbg_info(node);
2724 ir_node *src_block = get_nodes_block(node);
2725 ir_node *new_block = be_transform_node(src_block);
2726 ir_node *left = get_Cmp_left(node);
2727 ir_node *right = get_Cmp_right(node);
2729 ia32_address_mode_t am;
2730 ia32_address_t *addr = &am.addr;
2732 match_arguments(&am, src_block, left, right, NULL,
2733 match_commutative | match_am);
2735 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2736 addr->mem, am.new_op1, am.new_op2,
2738 set_am_attributes(new_node, &am);
2740 SET_IA32_ORIG_NODE(new_node, node);
2742 new_node = fix_mem_proj(new_node, &am);
2748 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2749 * to fold an and into a test node
2751 static bool can_fold_test_and(ir_node *node)
2753 const ir_edge_t *edge;
2755 /** we can only have eq and lg projs */
2756 foreach_out_edge(node, edge) {
2757 ir_node *proj = get_edge_src_irn(edge);
2758 pn_Cmp pnc = get_Proj_proj(proj);
2759 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2767 * returns true if it is assured, that the upper bits of a node are "clean"
2768 * which means for a 16 or 8 bit value, that the upper bits in the register
2769 * are 0 for unsigned and a copy of the last significant bit for signed
2772 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2774 assert(ia32_mode_needs_gp_reg(mode));
2775 if (get_mode_size_bits(mode) >= 32)
2778 if (is_Proj(transformed_node))
2779 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2781 switch (get_ia32_irn_opcode(transformed_node)) {
2782 case iro_ia32_Conv_I2I:
2783 case iro_ia32_Conv_I2I8Bit: {
2784 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2785 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2787 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2794 if (mode_is_signed(mode)) {
2795 return false; /* TODO handle signed modes */
2797 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2798 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2799 const ia32_immediate_attr_t *attr
2800 = get_ia32_immediate_attr_const(right);
2801 if (attr->symconst == 0 &&
2802 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2806 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2810 /* TODO too conservative if shift amount is constant */
2811 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2814 if (!mode_is_signed(mode)) {
2816 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2817 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2819 /* TODO if one is known to be zero extended, then || is sufficient */
2824 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2825 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2827 case iro_ia32_Const:
2828 case iro_ia32_Immediate: {
2829 const ia32_immediate_attr_t *attr =
2830 get_ia32_immediate_attr_const(transformed_node);
2831 if (mode_is_signed(mode)) {
2832 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2833 return shifted == 0 || shifted == -1;
2835 unsigned long shifted = (unsigned long)attr->offset;
2836 shifted >>= get_mode_size_bits(mode);
2837 return shifted == 0;
2847 * Generate code for a Cmp.
2849 static ir_node *gen_Cmp(ir_node *node)
2851 dbg_info *dbgi = get_irn_dbg_info(node);
2852 ir_node *block = get_nodes_block(node);
2853 ir_node *new_block = be_transform_node(block);
2854 ir_node *left = get_Cmp_left(node);
2855 ir_node *right = get_Cmp_right(node);
2856 ir_mode *cmp_mode = get_irn_mode(left);
2858 ia32_address_mode_t am;
2859 ia32_address_t *addr = &am.addr;
2862 if (mode_is_float(cmp_mode)) {
2863 if (ia32_cg_config.use_sse2) {
2864 return create_Ucomi(node);
2866 return create_Fucom(node);
2870 assert(ia32_mode_needs_gp_reg(cmp_mode));
2872 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2873 cmp_unsigned = !mode_is_signed(cmp_mode);
2874 if (is_Const_0(right) &&
2876 get_irn_n_edges(left) == 1 &&
2877 can_fold_test_and(node)) {
2878 /* Test(and_left, and_right) */
2879 ir_node *and_left = get_And_left(left);
2880 ir_node *and_right = get_And_right(left);
2882 /* matze: code here used mode instead of cmd_mode, I think it is always
2883 * the same as cmp_mode, but I leave this here to see if this is really
2886 assert(get_irn_mode(and_left) == cmp_mode);
2888 match_arguments(&am, block, and_left, and_right, NULL,
2890 match_am | match_8bit_am | match_16bit_am |
2891 match_am_and_immediates | match_immediate);
2893 /* use 32bit compare mode if possible since the opcode is smaller */
2894 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2895 upper_bits_clean(am.new_op2, cmp_mode)) {
2896 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2899 if (get_mode_size_bits(cmp_mode) == 8) {
2900 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2901 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2904 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2905 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2908 /* Cmp(left, right) */
2909 match_arguments(&am, block, left, right, NULL,
2910 match_commutative | match_am | match_8bit_am |
2911 match_16bit_am | match_am_and_immediates |
2913 /* use 32bit compare mode if possible since the opcode is smaller */
2914 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2915 upper_bits_clean(am.new_op2, cmp_mode)) {
2916 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2919 if (get_mode_size_bits(cmp_mode) == 8) {
2920 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2921 addr->index, addr->mem, am.new_op1,
2922 am.new_op2, am.ins_permuted,
2925 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2926 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2929 set_am_attributes(new_node, &am);
2930 set_ia32_ls_mode(new_node, cmp_mode);
2932 SET_IA32_ORIG_NODE(new_node, node);
2934 new_node = fix_mem_proj(new_node, &am);
2939 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2942 dbg_info *dbgi = get_irn_dbg_info(node);
2943 ir_node *block = get_nodes_block(node);
2944 ir_node *new_block = be_transform_node(block);
2945 ir_node *val_true = get_Mux_true(node);
2946 ir_node *val_false = get_Mux_false(node);
2948 ia32_address_mode_t am;
2949 ia32_address_t *addr;
2951 assert(ia32_cg_config.use_cmov);
2952 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2956 match_arguments(&am, block, val_false, val_true, flags,
2957 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2959 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2960 addr->mem, am.new_op1, am.new_op2, new_flags,
2961 am.ins_permuted, pnc);
2962 set_am_attributes(new_node, &am);
2964 SET_IA32_ORIG_NODE(new_node, node);
2966 new_node = fix_mem_proj(new_node, &am);
2972 * Creates a ia32 Setcc instruction.
2974 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2975 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2978 ir_mode *mode = get_irn_mode(orig_node);
2981 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2982 SET_IA32_ORIG_NODE(new_node, orig_node);
2984 /* we might need to conv the result up */
2985 if (get_mode_size_bits(mode) > 8) {
2986 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2987 nomem, new_node, mode_Bu);
2988 SET_IA32_ORIG_NODE(new_node, orig_node);
2995 * Create instruction for an unsigned Difference or Zero.
2997 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2999 ir_mode *mode = get_irn_mode(psi);
3000 ir_node *new_node, *sub, *sbb, *eflags, *block;
3004 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3005 match_mode_neutral | match_am | match_immediate | match_two_users);
3007 block = get_nodes_block(new_node);
3009 if (is_Proj(new_node)) {
3010 sub = get_Proj_pred(new_node);
3011 assert(is_ia32_Sub(sub));
3014 set_irn_mode(sub, mode_T);
3015 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3017 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3019 dbgi = get_irn_dbg_info(psi);
3020 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3022 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3023 set_ia32_commutative(new_node);
3028 * Create an const array of two float consts.
3030 * @param c0 the first constant
3031 * @param c1 the second constant
3032 * @param new_mode IN/OUT for the mode of the constants, if NULL
3033 * smallest possible mode will be used
3035 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3037 ir_mode *mode = *new_mode;
3039 ir_initializer_t *initializer;
3040 tarval *tv0 = get_Const_tarval(c0);
3041 tarval *tv1 = get_Const_tarval(c1);
3044 /* detect the best mode for the constants */
3045 mode = get_tarval_mode(tv0);
3047 if (mode != mode_F) {
3048 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3049 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3051 tv0 = tarval_convert_to(tv0, mode);
3052 tv1 = tarval_convert_to(tv1, mode);
3053 } else if (mode != mode_D) {
3054 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3055 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3057 tv0 = tarval_convert_to(tv0, mode);
3058 tv1 = tarval_convert_to(tv1, mode);
3065 tp = ia32_create_float_type(mode, 4);
3066 tp = ia32_create_float_array(tp);
3068 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3070 set_entity_ld_ident(ent, get_entity_ident(ent));
3071 set_entity_visibility(ent, visibility_local);
3072 set_entity_variability(ent, variability_constant);
3073 set_entity_allocation(ent, allocation_static);
3075 initializer = create_initializer_compound(2);
3077 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3078 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3080 set_entity_initializer(ent, initializer);
3087 * Transforms a Mux node into some code sequence.
3089 * @return The transformed node.
3091 static ir_node *gen_Mux(ir_node *node)
3093 dbg_info *dbgi = get_irn_dbg_info(node);
3094 ir_node *block = get_nodes_block(node);
3095 ir_node *new_block = be_transform_node(block);
3096 ir_node *mux_true = get_Mux_true(node);
3097 ir_node *mux_false = get_Mux_false(node);
3098 ir_node *cond = get_Mux_sel(node);
3099 ir_mode *mode = get_irn_mode(node);
3104 assert(get_irn_mode(cond) == mode_b);
3106 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3107 if (mode_is_float(mode)) {
3108 ir_node *cmp = get_Proj_pred(cond);
3109 ir_node *cmp_left = get_Cmp_left(cmp);
3110 ir_node *cmp_right = get_Cmp_right(cmp);
3111 pn_Cmp pnc = get_Proj_proj(cond);
3113 if (ia32_cg_config.use_sse2) {
3114 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3115 if (cmp_left == mux_true && cmp_right == mux_false) {
3116 /* Mux(a <= b, a, b) => MIN */
3117 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3118 match_commutative | match_am | match_two_users);
3119 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3120 /* Mux(a <= b, b, a) => MAX */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3122 match_commutative | match_am | match_two_users);
3124 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3125 if (cmp_left == mux_true && cmp_right == mux_false) {
3126 /* Mux(a >= b, a, b) => MAX */
3127 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3128 match_commutative | match_am | match_two_users);
3129 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3130 /* Mux(a >= b, b, a) => MIN */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3132 match_commutative | match_am | match_two_users);
3136 if (is_Const(mux_true) && is_Const(mux_false)) {
3137 ia32_address_mode_t am;
3142 flags = get_flags_node(cond, &pnc);
3143 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_permuted=*/0);
3145 if (ia32_cg_config.use_sse2) {
3146 /* cannot load from different mode on SSE */
3149 /* x87 can load any mode */
3153 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3155 switch (get_mode_size_bytes(new_mode)) {
3165 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3166 set_ia32_am_scale(new_node, 2);
3171 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3172 set_ia32_am_scale(new_node, 1);
3175 /* arg, shift 16 NOT supported */
3177 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3180 panic("Unsupported constant size");
3183 am.ls_mode = new_mode;
3184 am.addr.base = noreg_GP;
3185 am.addr.index = new_node;
3186 am.addr.mem = nomem;
3188 am.addr.scale = scale;
3189 am.addr.use_frame = 0;
3190 am.addr.frame_entity = NULL;
3191 am.addr.symconst_sign = 0;
3192 am.mem_proj = am.addr.mem;
3193 am.op_type = ia32_AddrModeS;
3196 am.pinned = op_pin_state_floats;
3198 am.ins_permuted = 0;
3200 if (ia32_cg_config.use_sse2)
3201 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3203 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3204 set_am_attributes(load, &am);
3206 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3208 panic("cannot transform floating point Mux");
3211 assert(ia32_mode_needs_gp_reg(mode));
3213 if (is_Proj(cond)) {
3214 ir_node *cmp = get_Proj_pred(cond);
3216 ir_node *cmp_left = get_Cmp_left(cmp);
3217 ir_node *cmp_right = get_Cmp_right(cmp);
3218 pn_Cmp pnc = get_Proj_proj(cond);
3220 /* check for unsigned Doz first */
3221 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3222 is_Const_0(mux_false) && is_Sub(mux_true) &&
3223 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3224 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3225 return create_Doz(node, cmp_left, cmp_right);
3226 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3227 is_Const_0(mux_true) && is_Sub(mux_false) &&
3228 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3229 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3230 return create_Doz(node, cmp_left, cmp_right);
3235 flags = get_flags_node(cond, &pnc);
3237 if (is_Const(mux_true) && is_Const(mux_false)) {
3238 /* both are const, good */
3239 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3240 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3241 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3242 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3244 /* Not that simple. */
3249 new_node = create_CMov(node, cond, flags, pnc);
3257 * Create a conversion from x87 state register to general purpose.
3259 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3261 ir_node *block = be_transform_node(get_nodes_block(node));
3262 ir_node *op = get_Conv_op(node);
3263 ir_node *new_op = be_transform_node(op);
3264 ir_graph *irg = current_ir_graph;
3265 dbg_info *dbgi = get_irn_dbg_info(node);
3266 ir_mode *mode = get_irn_mode(node);
3267 ir_node *fist, *load, *mem;
3269 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3270 set_irn_pinned(fist, op_pin_state_floats);
3271 set_ia32_use_frame(fist);
3272 set_ia32_op_type(fist, ia32_AddrModeD);
3274 assert(get_mode_size_bits(mode) <= 32);
3275 /* exception we can only store signed 32 bit integers, so for unsigned
3276 we store a 64bit (signed) integer and load the lower bits */
3277 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3278 set_ia32_ls_mode(fist, mode_Ls);
3280 set_ia32_ls_mode(fist, mode_Is);
3282 SET_IA32_ORIG_NODE(fist, node);
3285 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3287 set_irn_pinned(load, op_pin_state_floats);
3288 set_ia32_use_frame(load);
3289 set_ia32_op_type(load, ia32_AddrModeS);
3290 set_ia32_ls_mode(load, mode_Is);
3291 if (get_ia32_ls_mode(fist) == mode_Ls) {
3292 ia32_attr_t *attr = get_ia32_attr(load);
3293 attr->data.need_64bit_stackent = 1;
3295 ia32_attr_t *attr = get_ia32_attr(load);
3296 attr->data.need_32bit_stackent = 1;
3298 SET_IA32_ORIG_NODE(load, node);
3300 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3304 * Creates a x87 strict Conv by placing a Store and a Load
3306 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3308 ir_node *block = get_nodes_block(node);
3309 ir_graph *irg = get_Block_irg(block);
3310 dbg_info *dbgi = get_irn_dbg_info(node);
3311 ir_node *frame = get_irg_frame(irg);
3312 ir_node *store, *load;
3315 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3316 set_ia32_use_frame(store);
3317 set_ia32_op_type(store, ia32_AddrModeD);
3318 SET_IA32_ORIG_NODE(store, node);
3320 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3321 set_ia32_use_frame(load);
3322 set_ia32_op_type(load, ia32_AddrModeS);
3323 SET_IA32_ORIG_NODE(load, node);
3325 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3329 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3330 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3332 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3334 func = get_mode_size_bits(mode) == 8 ?
3335 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3336 return func(dbgi, block, base, index, mem, val, mode);
3340 * Create a conversion from general purpose to x87 register
3342 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3344 ir_node *src_block = get_nodes_block(node);
3345 ir_node *block = be_transform_node(src_block);
3346 ir_graph *irg = get_Block_irg(block);
3347 dbg_info *dbgi = get_irn_dbg_info(node);
3348 ir_node *op = get_Conv_op(node);
3349 ir_node *new_op = NULL;
3351 ir_mode *store_mode;
3356 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3357 if (possible_int_mode_for_fp(src_mode)) {
3358 ia32_address_mode_t am;
3360 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3361 if (am.op_type == ia32_AddrModeS) {
3362 ia32_address_t *addr = &am.addr;
3364 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3365 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3367 set_am_attributes(fild, &am);
3368 SET_IA32_ORIG_NODE(fild, node);
3370 fix_mem_proj(fild, &am);
3375 if (new_op == NULL) {
3376 new_op = be_transform_node(op);
3379 mode = get_irn_mode(op);
3381 /* first convert to 32 bit signed if necessary */
3382 if (get_mode_size_bits(src_mode) < 32) {
3383 if (!upper_bits_clean(new_op, src_mode)) {
3384 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3385 SET_IA32_ORIG_NODE(new_op, node);
3390 assert(get_mode_size_bits(mode) == 32);
3393 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3395 set_ia32_use_frame(store);
3396 set_ia32_op_type(store, ia32_AddrModeD);
3397 set_ia32_ls_mode(store, mode_Iu);
3399 /* exception for 32bit unsigned, do a 64bit spill+load */
3400 if (!mode_is_signed(mode)) {
3403 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3405 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3406 noreg_GP, nomem, zero_const);
3408 set_ia32_use_frame(zero_store);
3409 set_ia32_op_type(zero_store, ia32_AddrModeD);
3410 add_ia32_am_offs_int(zero_store, 4);
3411 set_ia32_ls_mode(zero_store, mode_Iu);
3416 store = new_rd_Sync(dbgi, block, 2, in);
3417 store_mode = mode_Ls;
3419 store_mode = mode_Is;
3423 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3425 set_ia32_use_frame(fild);
3426 set_ia32_op_type(fild, ia32_AddrModeS);
3427 set_ia32_ls_mode(fild, store_mode);
3429 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3435 * Create a conversion from one integer mode into another one
3437 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3438 dbg_info *dbgi, ir_node *block, ir_node *op,
3441 ir_node *new_block = be_transform_node(block);
3443 ir_mode *smaller_mode;
3444 ia32_address_mode_t am;
3445 ia32_address_t *addr = &am.addr;
3448 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3449 smaller_mode = src_mode;
3451 smaller_mode = tgt_mode;
3454 #ifdef DEBUG_libfirm
3456 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3461 match_arguments(&am, block, NULL, op, NULL,
3462 match_am | match_8bit_am | match_16bit_am);
3464 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3465 /* unnecessary conv. in theory it shouldn't have been AM */
3466 assert(is_ia32_NoReg_GP(addr->base));
3467 assert(is_ia32_NoReg_GP(addr->index));
3468 assert(is_NoMem(addr->mem));
3469 assert(am.addr.offset == 0);
3470 assert(am.addr.symconst_ent == NULL);
3474 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3475 addr->mem, am.new_op2, smaller_mode);
3476 set_am_attributes(new_node, &am);
3477 /* match_arguments assume that out-mode = in-mode, this isn't true here
3479 set_ia32_ls_mode(new_node, smaller_mode);
3480 SET_IA32_ORIG_NODE(new_node, node);
3481 new_node = fix_mem_proj(new_node, &am);
3486 * Transforms a Conv node.
3488 * @return The created ia32 Conv node
3490 static ir_node *gen_Conv(ir_node *node)
3492 ir_node *block = get_nodes_block(node);
3493 ir_node *new_block = be_transform_node(block);
3494 ir_node *op = get_Conv_op(node);
3495 ir_node *new_op = NULL;
3496 dbg_info *dbgi = get_irn_dbg_info(node);
3497 ir_mode *src_mode = get_irn_mode(op);
3498 ir_mode *tgt_mode = get_irn_mode(node);
3499 int src_bits = get_mode_size_bits(src_mode);
3500 int tgt_bits = get_mode_size_bits(tgt_mode);
3501 ir_node *res = NULL;
3503 assert(!mode_is_int(src_mode) || src_bits <= 32);
3504 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3506 if (src_mode == mode_b) {
3507 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3508 /* nothing to do, we already model bools as 0/1 ints */
3509 return be_transform_node(op);
3512 if (src_mode == tgt_mode) {
3513 if (get_Conv_strict(node)) {
3514 if (ia32_cg_config.use_sse2) {
3515 /* when we are in SSE mode, we can kill all strict no-op conversion */
3516 return be_transform_node(op);
3519 /* this should be optimized already, but who knows... */
3520 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3521 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3522 return be_transform_node(op);
3526 if (mode_is_float(src_mode)) {
3527 new_op = be_transform_node(op);
3528 /* we convert from float ... */
3529 if (mode_is_float(tgt_mode)) {
3531 /* Matze: I'm a bit unsure what the following is for? seems wrong
3533 if (src_mode == mode_E && tgt_mode == mode_D
3534 && !get_Conv_strict(node)) {
3535 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3541 if (ia32_cg_config.use_sse2) {
3542 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3543 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3545 set_ia32_ls_mode(res, tgt_mode);
3547 if (get_Conv_strict(node)) {
3548 /* if fp_no_float_fold is not set then we assume that we
3549 * don't have any float operations in a non
3550 * mode_float_arithmetic mode and can skip strict upconvs */
3551 if (src_bits < tgt_bits
3552 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3553 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3556 res = gen_x87_strict_conv(tgt_mode, new_op);
3557 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3561 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3566 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3567 if (ia32_cg_config.use_sse2) {
3568 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3570 set_ia32_ls_mode(res, src_mode);
3572 return gen_x87_fp_to_gp(node);
3576 /* we convert from int ... */
3577 if (mode_is_float(tgt_mode)) {
3579 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3580 if (ia32_cg_config.use_sse2) {
3581 new_op = be_transform_node(op);
3582 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3584 set_ia32_ls_mode(res, tgt_mode);
3586 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3587 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3588 res = gen_x87_gp_to_fp(node, src_mode);
3590 /* we need a strict-Conv, if the int mode has more bits than the
3592 if (float_mantissa < int_mantissa) {
3593 res = gen_x87_strict_conv(tgt_mode, res);
3594 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3598 } else if (tgt_mode == mode_b) {
3599 /* mode_b lowering already took care that we only have 0/1 values */
3600 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3601 src_mode, tgt_mode));
3602 return be_transform_node(op);
3605 if (src_bits == tgt_bits) {
3606 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3607 src_mode, tgt_mode));
3608 return be_transform_node(op);
3611 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3619 static ir_node *create_immediate_or_transform(ir_node *node,
3620 char immediate_constraint_type)
3622 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3623 if (new_node == NULL) {
3624 new_node = be_transform_node(node);
3630 * Transforms a FrameAddr into an ia32 Add.
3632 static ir_node *gen_be_FrameAddr(ir_node *node)
3634 ir_node *block = be_transform_node(get_nodes_block(node));
3635 ir_node *op = be_get_FrameAddr_frame(node);
3636 ir_node *new_op = be_transform_node(op);
3637 dbg_info *dbgi = get_irn_dbg_info(node);
3640 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3641 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3642 set_ia32_use_frame(new_node);
3644 SET_IA32_ORIG_NODE(new_node, node);
3650 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3652 static ir_node *gen_be_Return(ir_node *node)
3654 ir_graph *irg = current_ir_graph;
3655 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3656 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3657 ir_entity *ent = get_irg_entity(irg);
3658 ir_type *tp = get_entity_type(ent);
3663 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3664 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3666 int pn_ret_val, pn_ret_mem, arity, i;
3668 assert(ret_val != NULL);
3669 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3670 return be_duplicate_node(node);
3673 res_type = get_method_res_type(tp, 0);
3675 if (! is_Primitive_type(res_type)) {
3676 return be_duplicate_node(node);
3679 mode = get_type_mode(res_type);
3680 if (! mode_is_float(mode)) {
3681 return be_duplicate_node(node);
3684 assert(get_method_n_ress(tp) == 1);
3686 pn_ret_val = get_Proj_proj(ret_val);
3687 pn_ret_mem = get_Proj_proj(ret_mem);
3689 /* get the Barrier */
3690 barrier = get_Proj_pred(ret_val);
3692 /* get result input of the Barrier */
3693 ret_val = get_irn_n(barrier, pn_ret_val);
3694 new_ret_val = be_transform_node(ret_val);
3696 /* get memory input of the Barrier */
3697 ret_mem = get_irn_n(barrier, pn_ret_mem);
3698 new_ret_mem = be_transform_node(ret_mem);
3700 frame = get_irg_frame(irg);
3702 dbgi = get_irn_dbg_info(barrier);
3703 block = be_transform_node(get_nodes_block(barrier));
3705 /* store xmm0 onto stack */
3706 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3707 new_ret_mem, new_ret_val);
3708 set_ia32_ls_mode(sse_store, mode);
3709 set_ia32_op_type(sse_store, ia32_AddrModeD);
3710 set_ia32_use_frame(sse_store);
3712 /* load into x87 register */
3713 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3714 set_ia32_op_type(fld, ia32_AddrModeS);
3715 set_ia32_use_frame(fld);
3717 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3718 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3720 /* create a new barrier */
3721 arity = get_irn_arity(barrier);
3722 in = ALLOCAN(ir_node*, arity);
3723 for (i = 0; i < arity; ++i) {
3726 if (i == pn_ret_val) {
3728 } else if (i == pn_ret_mem) {
3731 ir_node *in = get_irn_n(barrier, i);
3732 new_in = be_transform_node(in);
3737 new_barrier = new_ir_node(dbgi, irg, block,
3738 get_irn_op(barrier), get_irn_mode(barrier),
3740 copy_node_attr(barrier, new_barrier);
3741 be_duplicate_deps(barrier, new_barrier);
3742 be_set_transformed_node(barrier, new_barrier);
3744 /* transform normally */
3745 return be_duplicate_node(node);
3749 * Transform a be_AddSP into an ia32_SubSP.
3751 static ir_node *gen_be_AddSP(ir_node *node)
3753 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3754 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3756 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3757 match_am | match_immediate);
3761 * Transform a be_SubSP into an ia32_AddSP
3763 static ir_node *gen_be_SubSP(ir_node *node)
3765 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3766 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3768 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3769 match_am | match_immediate);
3773 * Change some phi modes
3775 static ir_node *gen_Phi(ir_node *node)
3777 ir_node *block = be_transform_node(get_nodes_block(node));
3778 ir_graph *irg = current_ir_graph;
3779 dbg_info *dbgi = get_irn_dbg_info(node);
3780 ir_mode *mode = get_irn_mode(node);
3783 if (ia32_mode_needs_gp_reg(mode)) {
3784 /* we shouldn't have any 64bit stuff around anymore */
3785 assert(get_mode_size_bits(mode) <= 32);
3786 /* all integer operations are on 32bit registers now */
3788 } else if (mode_is_float(mode)) {
3789 if (ia32_cg_config.use_sse2) {
3796 /* phi nodes allow loops, so we use the old arguments for now
3797 * and fix this later */
3798 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3799 get_irn_in(node) + 1);
3800 copy_node_attr(node, phi);
3801 be_duplicate_deps(node, phi);
3803 be_enqueue_preds(node);
3811 static ir_node *gen_IJmp(ir_node *node)
3813 ir_node *block = get_nodes_block(node);
3814 ir_node *new_block = be_transform_node(block);
3815 dbg_info *dbgi = get_irn_dbg_info(node);
3816 ir_node *op = get_IJmp_target(node);
3818 ia32_address_mode_t am;
3819 ia32_address_t *addr = &am.addr;
3821 assert(get_irn_mode(op) == mode_P);
3823 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3825 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3826 addr->mem, am.new_op2);
3827 set_am_attributes(new_node, &am);
3828 SET_IA32_ORIG_NODE(new_node, node);
3830 new_node = fix_mem_proj(new_node, &am);
3836 * Transform a Bound node.
3838 static ir_node *gen_Bound(ir_node *node)
3841 ir_node *lower = get_Bound_lower(node);
3842 dbg_info *dbgi = get_irn_dbg_info(node);
3844 if (is_Const_0(lower)) {
3845 /* typical case for Java */
3846 ir_node *sub, *res, *flags, *block;
3848 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3849 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3851 block = get_nodes_block(res);
3852 if (! is_Proj(res)) {
3854 set_irn_mode(sub, mode_T);
3855 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3857 sub = get_Proj_pred(res);
3859 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3860 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3861 SET_IA32_ORIG_NODE(new_node, node);
3863 panic("generic Bound not supported in ia32 Backend");
3869 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3871 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3872 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3874 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3875 match_immediate | match_mode_neutral);
3878 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3880 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3881 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3882 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3886 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3888 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3889 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3890 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3894 static ir_node *gen_ia32_l_Add(ir_node *node)
3896 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3897 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3898 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3899 match_commutative | match_am | match_immediate |
3900 match_mode_neutral);
3902 if (is_Proj(lowered)) {
3903 lowered = get_Proj_pred(lowered);
3905 assert(is_ia32_Add(lowered));
3906 set_irn_mode(lowered, mode_T);
3912 static ir_node *gen_ia32_l_Adc(ir_node *node)
3914 return gen_binop_flags(node, new_bd_ia32_Adc,
3915 match_commutative | match_am | match_immediate |
3916 match_mode_neutral);
3920 * Transforms a l_MulS into a "real" MulS node.
3922 * @return the created ia32 Mul node
3924 static ir_node *gen_ia32_l_Mul(ir_node *node)
3926 ir_node *left = get_binop_left(node);
3927 ir_node *right = get_binop_right(node);
3929 return gen_binop(node, left, right, new_bd_ia32_Mul,
3930 match_commutative | match_am | match_mode_neutral);
3934 * Transforms a l_IMulS into a "real" IMul1OPS node.
3936 * @return the created ia32 IMul1OP node
3938 static ir_node *gen_ia32_l_IMul(ir_node *node)
3940 ir_node *left = get_binop_left(node);
3941 ir_node *right = get_binop_right(node);
3943 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3944 match_commutative | match_am | match_mode_neutral);
3947 static ir_node *gen_ia32_l_Sub(ir_node *node)
3949 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3950 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3951 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3952 match_am | match_immediate | match_mode_neutral);
3954 if (is_Proj(lowered)) {
3955 lowered = get_Proj_pred(lowered);
3957 assert(is_ia32_Sub(lowered));
3958 set_irn_mode(lowered, mode_T);
3964 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3966 return gen_binop_flags(node, new_bd_ia32_Sbb,
3967 match_am | match_immediate | match_mode_neutral);
3971 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3972 * op1 - target to be shifted
3973 * op2 - contains bits to be shifted into target
3975 * Only op3 can be an immediate.
3977 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3978 ir_node *low, ir_node *count)
3980 ir_node *block = get_nodes_block(node);
3981 ir_node *new_block = be_transform_node(block);
3982 dbg_info *dbgi = get_irn_dbg_info(node);
3983 ir_node *new_high = be_transform_node(high);
3984 ir_node *new_low = be_transform_node(low);
3988 /* the shift amount can be any mode that is bigger than 5 bits, since all
3989 * other bits are ignored anyway */
3990 while (is_Conv(count) &&
3991 get_irn_n_edges(count) == 1 &&
3992 mode_is_int(get_irn_mode(count))) {
3993 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3994 count = get_Conv_op(count);
3996 new_count = create_immediate_or_transform(count, 0);
3998 if (is_ia32_l_ShlD(node)) {
3999 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4002 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4005 SET_IA32_ORIG_NODE(new_node, node);
4010 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4012 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4013 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4014 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4015 return gen_lowered_64bit_shifts(node, high, low, count);
4018 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4020 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4021 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4022 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4023 return gen_lowered_64bit_shifts(node, high, low, count);
4026 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4028 ir_node *src_block = get_nodes_block(node);
4029 ir_node *block = be_transform_node(src_block);
4030 ir_graph *irg = current_ir_graph;
4031 dbg_info *dbgi = get_irn_dbg_info(node);
4032 ir_node *frame = get_irg_frame(irg);
4033 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4034 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4035 ir_node *new_val_low = be_transform_node(val_low);
4036 ir_node *new_val_high = be_transform_node(val_high);
4038 ir_node *sync, *fild, *res;
4039 ir_node *store_low, *store_high;
4041 if (ia32_cg_config.use_sse2) {
4042 panic("ia32_l_LLtoFloat not implemented for SSE2");
4046 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4048 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4050 SET_IA32_ORIG_NODE(store_low, node);
4051 SET_IA32_ORIG_NODE(store_high, node);
4053 set_ia32_use_frame(store_low);
4054 set_ia32_use_frame(store_high);
4055 set_ia32_op_type(store_low, ia32_AddrModeD);
4056 set_ia32_op_type(store_high, ia32_AddrModeD);
4057 set_ia32_ls_mode(store_low, mode_Iu);
4058 set_ia32_ls_mode(store_high, mode_Is);
4059 add_ia32_am_offs_int(store_high, 4);
4063 sync = new_rd_Sync(dbgi, block, 2, in);
4066 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4068 set_ia32_use_frame(fild);
4069 set_ia32_op_type(fild, ia32_AddrModeS);
4070 set_ia32_ls_mode(fild, mode_Ls);
4072 SET_IA32_ORIG_NODE(fild, node);
4074 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4076 if (! mode_is_signed(get_irn_mode(val_high))) {
4077 ia32_address_mode_t am;
4079 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4082 am.addr.base = noreg_GP;
4083 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4084 am.addr.mem = nomem;
4087 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4088 am.addr.use_frame = 0;
4089 am.addr.frame_entity = NULL;
4090 am.addr.symconst_sign = 0;
4091 am.ls_mode = mode_F;
4092 am.mem_proj = nomem;
4093 am.op_type = ia32_AddrModeS;
4095 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4096 am.pinned = op_pin_state_floats;
4098 am.ins_permuted = 0;
4100 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4101 am.new_op1, am.new_op2, get_fpcw());
4102 set_am_attributes(fadd, &am);
4104 set_irn_mode(fadd, mode_T);
4105 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4110 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4112 ir_node *src_block = get_nodes_block(node);
4113 ir_node *block = be_transform_node(src_block);
4114 ir_graph *irg = get_Block_irg(block);
4115 dbg_info *dbgi = get_irn_dbg_info(node);
4116 ir_node *frame = get_irg_frame(irg);
4117 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4118 ir_node *new_val = be_transform_node(val);
4119 ir_node *fist, *mem;
4121 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4122 SET_IA32_ORIG_NODE(fist, node);
4123 set_ia32_use_frame(fist);
4124 set_ia32_op_type(fist, ia32_AddrModeD);
4125 set_ia32_ls_mode(fist, mode_Ls);
4131 * the BAD transformer.
4133 static ir_node *bad_transform(ir_node *node)
4135 panic("No transform function for %+F available.", node);
4139 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4141 ir_node *block = be_transform_node(get_nodes_block(node));
4142 ir_graph *irg = get_Block_irg(block);
4143 ir_node *pred = get_Proj_pred(node);
4144 ir_node *new_pred = be_transform_node(pred);
4145 ir_node *frame = get_irg_frame(irg);
4146 dbg_info *dbgi = get_irn_dbg_info(node);
4147 long pn = get_Proj_proj(node);
4152 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4153 SET_IA32_ORIG_NODE(load, node);
4154 set_ia32_use_frame(load);
4155 set_ia32_op_type(load, ia32_AddrModeS);
4156 set_ia32_ls_mode(load, mode_Iu);
4157 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4158 * 32 bit from it with this particular load */
4159 attr = get_ia32_attr(load);
4160 attr->data.need_64bit_stackent = 1;
4162 if (pn == pn_ia32_l_FloattoLL_res_high) {
4163 add_ia32_am_offs_int(load, 4);
4165 assert(pn == pn_ia32_l_FloattoLL_res_low);
4168 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4174 * Transform the Projs of an AddSP.
4176 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4178 ir_node *block = be_transform_node(get_nodes_block(node));
4179 ir_node *pred = get_Proj_pred(node);
4180 ir_node *new_pred = be_transform_node(pred);
4181 dbg_info *dbgi = get_irn_dbg_info(node);
4182 long proj = get_Proj_proj(node);
4184 if (proj == pn_be_AddSP_sp) {
4185 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4186 pn_ia32_SubSP_stack);
4187 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4189 } else if (proj == pn_be_AddSP_res) {
4190 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4191 pn_ia32_SubSP_addr);
4192 } else if (proj == pn_be_AddSP_M) {
4193 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4196 panic("No idea how to transform proj->AddSP");
4200 * Transform the Projs of a SubSP.
4202 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4204 ir_node *block = be_transform_node(get_nodes_block(node));
4205 ir_node *pred = get_Proj_pred(node);
4206 ir_node *new_pred = be_transform_node(pred);
4207 dbg_info *dbgi = get_irn_dbg_info(node);
4208 long proj = get_Proj_proj(node);
4210 if (proj == pn_be_SubSP_sp) {
4211 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4212 pn_ia32_AddSP_stack);
4213 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4215 } else if (proj == pn_be_SubSP_M) {
4216 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4219 panic("No idea how to transform proj->SubSP");
4223 * Transform and renumber the Projs from a Load.
4225 static ir_node *gen_Proj_Load(ir_node *node)
4228 ir_node *block = be_transform_node(get_nodes_block(node));
4229 ir_node *pred = get_Proj_pred(node);
4230 dbg_info *dbgi = get_irn_dbg_info(node);
4231 long proj = get_Proj_proj(node);
4233 /* loads might be part of source address mode matches, so we don't
4234 * transform the ProjMs yet (with the exception of loads whose result is
4237 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4239 ir_node *old_block = get_nodes_block(node);
4241 /* this is needed, because sometimes we have loops that are only
4242 reachable through the ProjM */
4243 be_enqueue_preds(node);
4244 /* do it in 2 steps, to silence firm verifier */
4245 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4246 set_Proj_proj(res, pn_ia32_mem);
4250 /* renumber the proj */
4251 new_pred = be_transform_node(pred);
4252 if (is_ia32_Load(new_pred)) {
4255 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4257 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4258 case pn_Load_X_regular:
4259 return new_rd_Jmp(dbgi, block);
4260 case pn_Load_X_except:
4261 /* This Load might raise an exception. Mark it. */
4262 set_ia32_exc_label(new_pred, 1);
4263 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4267 } else if (is_ia32_Conv_I2I(new_pred) ||
4268 is_ia32_Conv_I2I8Bit(new_pred)) {
4269 set_irn_mode(new_pred, mode_T);
4270 if (proj == pn_Load_res) {
4271 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4272 } else if (proj == pn_Load_M) {
4273 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4275 } else if (is_ia32_xLoad(new_pred)) {
4278 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4280 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4281 case pn_Load_X_regular:
4282 return new_rd_Jmp(dbgi, block);
4283 case pn_Load_X_except:
4284 /* This Load might raise an exception. Mark it. */
4285 set_ia32_exc_label(new_pred, 1);
4286 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4290 } else if (is_ia32_vfld(new_pred)) {
4293 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4295 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4296 case pn_Load_X_regular:
4297 return new_rd_Jmp(dbgi, block);
4298 case pn_Load_X_except:
4299 /* This Load might raise an exception. Mark it. */
4300 set_ia32_exc_label(new_pred, 1);
4301 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4306 /* can happen for ProJMs when source address mode happened for the
4309 /* however it should not be the result proj, as that would mean the
4310 load had multiple users and should not have been used for
4312 if (proj != pn_Load_M) {
4313 panic("internal error: transformed node not a Load");
4315 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4318 panic("No idea how to transform proj");
4322 * Transform and renumber the Projs from a DivMod like instruction.
4324 static ir_node *gen_Proj_DivMod(ir_node *node)
4326 ir_node *block = be_transform_node(get_nodes_block(node));
4327 ir_node *pred = get_Proj_pred(node);
4328 ir_node *new_pred = be_transform_node(pred);
4329 dbg_info *dbgi = get_irn_dbg_info(node);
4330 long proj = get_Proj_proj(node);
4332 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4334 switch (get_irn_opcode(pred)) {
4338 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4340 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4341 case pn_Div_X_regular:
4342 return new_rd_Jmp(dbgi, block);
4343 case pn_Div_X_except:
4344 set_ia32_exc_label(new_pred, 1);
4345 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4353 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4355 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4356 case pn_Mod_X_except:
4357 set_ia32_exc_label(new_pred, 1);
4358 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4366 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4367 case pn_DivMod_res_div:
4368 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4369 case pn_DivMod_res_mod:
4370 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4371 case pn_DivMod_X_regular:
4372 return new_rd_Jmp(dbgi, block);
4373 case pn_DivMod_X_except:
4374 set_ia32_exc_label(new_pred, 1);
4375 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4384 panic("No idea how to transform proj->DivMod");
4388 * Transform and renumber the Projs from a CopyB.
4390 static ir_node *gen_Proj_CopyB(ir_node *node)
4392 ir_node *block = be_transform_node(get_nodes_block(node));
4393 ir_node *pred = get_Proj_pred(node);
4394 ir_node *new_pred = be_transform_node(pred);
4395 dbg_info *dbgi = get_irn_dbg_info(node);
4396 long proj = get_Proj_proj(node);
4399 case pn_CopyB_M_regular:
4400 if (is_ia32_CopyB_i(new_pred)) {
4401 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4402 } else if (is_ia32_CopyB(new_pred)) {
4403 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4410 panic("No idea how to transform proj->CopyB");
4414 * Transform and renumber the Projs from a Quot.
4416 static ir_node *gen_Proj_Quot(ir_node *node)
4418 ir_node *block = be_transform_node(get_nodes_block(node));
4419 ir_node *pred = get_Proj_pred(node);
4420 ir_node *new_pred = be_transform_node(pred);
4421 dbg_info *dbgi = get_irn_dbg_info(node);
4422 long proj = get_Proj_proj(node);
4426 if (is_ia32_xDiv(new_pred)) {
4427 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4428 } else if (is_ia32_vfdiv(new_pred)) {
4429 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4433 if (is_ia32_xDiv(new_pred)) {
4434 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4435 } else if (is_ia32_vfdiv(new_pred)) {
4436 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4439 case pn_Quot_X_regular:
4440 case pn_Quot_X_except:
4445 panic("No idea how to transform proj->Quot");
4448 static ir_node *gen_be_Call(ir_node *node)
4450 dbg_info *const dbgi = get_irn_dbg_info(node);
4451 ir_node *const src_block = get_nodes_block(node);
4452 ir_node *const block = be_transform_node(src_block);
4453 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4454 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4455 ir_node *const sp = be_transform_node(src_sp);
4456 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4457 ia32_address_mode_t am;
4458 ia32_address_t *const addr = &am.addr;
4463 ir_node * eax = noreg_GP;
4464 ir_node * ecx = noreg_GP;
4465 ir_node * edx = noreg_GP;
4466 unsigned const pop = be_Call_get_pop(node);
4467 ir_type *const call_tp = be_Call_get_type(node);
4468 int old_no_pic_adjust;
4470 /* Run the x87 simulator if the call returns a float value */
4471 if (get_method_n_ress(call_tp) > 0) {
4472 ir_type *const res_type = get_method_res_type(call_tp, 0);
4473 ir_mode *const res_mode = get_type_mode(res_type);
4475 if (res_mode != NULL && mode_is_float(res_mode)) {
4476 env_cg->do_x87_sim = 1;
4480 /* We do not want be_Call direct calls */
4481 assert(be_Call_get_entity(node) == NULL);
4483 /* special case for PIC trampoline calls */
4484 old_no_pic_adjust = no_pic_adjust;
4485 no_pic_adjust = env_cg->birg->main_env->options->pic;
4487 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4488 match_am | match_immediate);
4490 no_pic_adjust = old_no_pic_adjust;
4492 i = get_irn_arity(node) - 1;
4493 fpcw = be_transform_node(get_irn_n(node, i--));
4494 for (; i >= be_pos_Call_first_arg; --i) {
4495 arch_register_req_t const *const req = arch_get_register_req(node, i);
4496 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4498 assert(req->type == arch_register_req_type_limited);
4499 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4501 switch (*req->limited) {
4502 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4503 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4504 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4505 default: panic("Invalid GP register for register parameter");
4509 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4510 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4511 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4512 set_am_attributes(call, &am);
4513 call = fix_mem_proj(call, &am);
4515 if (get_irn_pinned(node) == op_pin_state_pinned)
4516 set_irn_pinned(call, op_pin_state_pinned);
4518 SET_IA32_ORIG_NODE(call, node);
4520 if (ia32_cg_config.use_sse2) {
4521 /* remember this call for post-processing */
4522 ARR_APP1(ir_node *, call_list, call);
4523 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4530 * Transform Builtin trap
4532 static ir_node *gen_trap(ir_node *node) {
4533 dbg_info *dbgi = get_irn_dbg_info(node);
4534 ir_node *block = be_transform_node(get_nodes_block(node));
4535 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4537 return new_bd_ia32_UD2(dbgi, block, mem);
4541 * Transform Builtin debugbreak
4543 static ir_node *gen_debugbreak(ir_node *node) {
4544 dbg_info *dbgi = get_irn_dbg_info(node);
4545 ir_node *block = be_transform_node(get_nodes_block(node));
4546 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4548 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4552 * Transform Builtin return_address
4554 static ir_node *gen_return_address(ir_node *node) {
4555 ir_node *param = get_Builtin_param(node, 0);
4556 ir_node *frame = get_Builtin_param(node, 1);
4557 dbg_info *dbgi = get_irn_dbg_info(node);
4558 tarval *tv = get_Const_tarval(param);
4559 unsigned long value = get_tarval_long(tv);
4561 ir_node *block = be_transform_node(get_nodes_block(node));
4562 ir_node *ptr = be_transform_node(frame);
4566 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4567 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4568 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4571 /* load the return address from this frame */
4572 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4574 set_irn_pinned(load, get_irn_pinned(node));
4575 set_ia32_op_type(load, ia32_AddrModeS);
4576 set_ia32_ls_mode(load, mode_Iu);
4578 set_ia32_am_offs_int(load, 0);
4579 set_ia32_use_frame(load);
4580 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4582 if (get_irn_pinned(node) == op_pin_state_floats) {
4583 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4584 && pn_ia32_vfld_res == pn_ia32_Load_res
4585 && pn_ia32_Load_res == pn_ia32_res);
4586 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4589 SET_IA32_ORIG_NODE(load, node);
4590 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4594 * Transform Builtin frame_address
4596 static ir_node *gen_frame_address(ir_node *node) {
4597 ir_node *param = get_Builtin_param(node, 0);
4598 ir_node *frame = get_Builtin_param(node, 1);
4599 dbg_info *dbgi = get_irn_dbg_info(node);
4600 tarval *tv = get_Const_tarval(param);
4601 unsigned long value = get_tarval_long(tv);
4603 ir_node *block = be_transform_node(get_nodes_block(node));
4604 ir_node *ptr = be_transform_node(frame);
4609 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4610 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4611 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4614 /* load the frame address from this frame */
4615 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4617 set_irn_pinned(load, get_irn_pinned(node));
4618 set_ia32_op_type(load, ia32_AddrModeS);
4619 set_ia32_ls_mode(load, mode_Iu);
4621 ent = ia32_get_frame_address_entity();
4623 set_ia32_am_offs_int(load, 0);
4624 set_ia32_use_frame(load);
4625 set_ia32_frame_ent(load, ent);
4627 /* will fail anyway, but gcc does this: */
4628 set_ia32_am_offs_int(load, 0);
4631 if (get_irn_pinned(node) == op_pin_state_floats) {
4632 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4633 && pn_ia32_vfld_res == pn_ia32_Load_res
4634 && pn_ia32_Load_res == pn_ia32_res);
4635 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4638 SET_IA32_ORIG_NODE(load, node);
4639 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4643 * Transform Builtin frame_address
4645 static ir_node *gen_prefetch(ir_node *node) {
4647 ir_node *ptr, *block, *mem, *base, *index;
4648 ir_node *param, *new_node;
4651 ia32_address_t addr;
4653 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4654 /* no prefetch at all, route memory */
4655 return be_transform_node(get_Builtin_mem(node));
4658 param = get_Builtin_param(node, 1);
4659 tv = get_Const_tarval(param);
4660 rw = get_tarval_long(tv);
4662 /* construct load address */
4663 memset(&addr, 0, sizeof(addr));
4664 ptr = get_Builtin_param(node, 0);
4665 ia32_create_address_mode(&addr, ptr, 0);
4672 base = be_transform_node(base);
4675 if (index == NULL) {
4678 index = be_transform_node(index);
4681 dbgi = get_irn_dbg_info(node);
4682 block = be_transform_node(get_nodes_block(node));
4683 mem = be_transform_node(get_Builtin_mem(node));
4685 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4686 /* we have 3DNow!, this was already checked above */
4687 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4688 } else if (ia32_cg_config.use_sse_prefetch) {
4689 /* note: rw == 1 is IGNORED in that case */
4690 param = get_Builtin_param(node, 2);
4691 tv = get_Const_tarval(param);
4692 locality = get_tarval_long(tv);
4694 /* SSE style prefetch */
4697 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4700 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4703 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4706 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4710 assert(ia32_cg_config.use_3dnow_prefetch);
4711 /* 3DNow! style prefetch */
4712 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4715 set_irn_pinned(new_node, get_irn_pinned(node));
4716 set_ia32_op_type(new_node, ia32_AddrModeS);
4717 set_ia32_ls_mode(new_node, mode_Bu);
4718 set_address(new_node, &addr);
4720 SET_IA32_ORIG_NODE(new_node, node);
4722 be_dep_on_frame(new_node);
4723 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4727 * Transform bsf like node
4729 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4731 ir_node *param = get_Builtin_param(node, 0);
4732 dbg_info *dbgi = get_irn_dbg_info(node);
4734 ir_node *block = get_nodes_block(node);
4735 ir_node *new_block = be_transform_node(block);
4737 ia32_address_mode_t am;
4738 ia32_address_t *addr = &am.addr;
4741 match_arguments(&am, block, NULL, param, NULL, match_am);
4743 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4744 set_am_attributes(cnt, &am);
4745 set_ia32_ls_mode(cnt, get_irn_mode(param));
4747 SET_IA32_ORIG_NODE(cnt, node);
4748 return fix_mem_proj(cnt, &am);
4752 * Transform builtin ffs.
4754 static ir_node *gen_ffs(ir_node *node)
4756 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4757 ir_node *real = skip_Proj(bsf);
4758 dbg_info *dbgi = get_irn_dbg_info(real);
4759 ir_node *block = get_nodes_block(real);
4760 ir_node *flag, *set, *conv, *neg, *or;
4763 if (get_irn_mode(real) != mode_T) {
4764 set_irn_mode(real, mode_T);
4765 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4768 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4771 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4772 SET_IA32_ORIG_NODE(set, node);
4775 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4776 SET_IA32_ORIG_NODE(conv, node);
4779 neg = new_bd_ia32_Neg(dbgi, block, conv);
4782 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4783 set_ia32_commutative(or);
4786 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4790 * Transform builtin clz.
4792 static ir_node *gen_clz(ir_node *node)
4794 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4795 ir_node *real = skip_Proj(bsr);
4796 dbg_info *dbgi = get_irn_dbg_info(real);
4797 ir_node *block = get_nodes_block(real);
4798 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4800 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4804 * Transform builtin ctz.
4806 static ir_node *gen_ctz(ir_node *node)
4808 return gen_unop_AM(node, new_bd_ia32_Bsf);
4812 * Transform builtin parity.
4814 static ir_node *gen_parity(ir_node *node)
4816 ir_node *param = get_Builtin_param(node, 0);
4817 dbg_info *dbgi = get_irn_dbg_info(node);
4819 ir_node *block = get_nodes_block(node);
4821 ir_node *new_block = be_transform_node(block);
4822 ir_node *imm, *cmp, *new_node;
4824 ia32_address_mode_t am;
4825 ia32_address_t *addr = &am.addr;
4829 match_arguments(&am, block, NULL, param, NULL, match_am);
4830 imm = ia32_create_Immediate(NULL, 0, 0);
4831 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4832 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4833 set_am_attributes(cmp, &am);
4834 set_ia32_ls_mode(cmp, mode_Iu);
4836 SET_IA32_ORIG_NODE(cmp, node);
4838 cmp = fix_mem_proj(cmp, &am);
4841 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4842 SET_IA32_ORIG_NODE(new_node, node);
4845 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4846 nomem, new_node, mode_Bu);
4847 SET_IA32_ORIG_NODE(new_node, node);
4852 * Transform builtin popcount
4854 static ir_node *gen_popcount(ir_node *node) {
4855 ir_node *param = get_Builtin_param(node, 0);
4856 dbg_info *dbgi = get_irn_dbg_info(node);
4858 ir_node *block = get_nodes_block(node);
4859 ir_node *new_block = be_transform_node(block);
4862 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4864 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4865 if (ia32_cg_config.use_popcnt) {
4866 ia32_address_mode_t am;
4867 ia32_address_t *addr = &am.addr;
4870 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4872 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4873 set_am_attributes(cnt, &am);
4874 set_ia32_ls_mode(cnt, get_irn_mode(param));
4876 SET_IA32_ORIG_NODE(cnt, node);
4877 return fix_mem_proj(cnt, &am);
4880 new_param = be_transform_node(param);
4882 /* do the standard popcount algo */
4884 /* m1 = x & 0x55555555 */
4885 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4886 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4889 simm = ia32_create_Immediate(NULL, 0, 1);
4890 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4892 /* m2 = s1 & 0x55555555 */
4893 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4896 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4898 /* m4 = m3 & 0x33333333 */
4899 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4900 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4903 simm = ia32_create_Immediate(NULL, 0, 2);
4904 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4906 /* m5 = s2 & 0x33333333 */
4907 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4910 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4912 /* m7 = m6 & 0x0F0F0F0F */
4913 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4914 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4917 simm = ia32_create_Immediate(NULL, 0, 4);
4918 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4920 /* m8 = s3 & 0x0F0F0F0F */
4921 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4924 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4926 /* m10 = m9 & 0x00FF00FF */
4927 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4928 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4931 simm = ia32_create_Immediate(NULL, 0, 8);
4932 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4934 /* m11 = s4 & 0x00FF00FF */
4935 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4937 /* m12 = m10 + m11 */
4938 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4940 /* m13 = m12 & 0x0000FFFF */
4941 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4942 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4944 /* s5 = m12 >> 16 */
4945 simm = ia32_create_Immediate(NULL, 0, 16);
4946 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4948 /* res = m13 + s5 */
4949 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4953 * Transform builtin byte swap.
4955 static ir_node *gen_bswap(ir_node *node) {
4956 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4957 dbg_info *dbgi = get_irn_dbg_info(node);
4959 ir_node *block = get_nodes_block(node);
4960 ir_node *new_block = be_transform_node(block);
4961 ir_mode *mode = get_irn_mode(param);
4962 unsigned size = get_mode_size_bits(mode);
4963 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4967 if (ia32_cg_config.use_i486) {
4968 /* swap available */
4969 return new_bd_ia32_Bswap(dbgi, new_block, param);
4971 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4972 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4974 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4975 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4977 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4979 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4980 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4982 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4983 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4986 /* swap16 always available */
4987 return new_bd_ia32_Bswap16(dbgi, new_block, param);
4990 panic("Invalid bswap size (%d)", size);
4995 * Transform builtin outport.
4997 static ir_node *gen_outport(ir_node *node) {
4998 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
4999 ir_node *oldv = get_Builtin_param(node, 1);
5000 ir_mode *mode = get_irn_mode(oldv);
5001 ir_node *value = be_transform_node(oldv);
5002 ir_node *block = be_transform_node(get_nodes_block(node));
5003 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5004 dbg_info *dbgi = get_irn_dbg_info(node);
5006 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5007 set_ia32_ls_mode(res, mode);
5012 * Transform builtin inport.
5014 static ir_node *gen_inport(ir_node *node) {
5015 ir_type *tp = get_Builtin_type(node);
5016 ir_type *rstp = get_method_res_type(tp, 0);
5017 ir_mode *mode = get_type_mode(rstp);
5018 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5019 ir_node *block = be_transform_node(get_nodes_block(node));
5020 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5021 dbg_info *dbgi = get_irn_dbg_info(node);
5023 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5024 set_ia32_ls_mode(res, mode);
5026 /* check for missing Result Proj */
5031 * Transform a builtin inner trampoline
5033 static ir_node *gen_inner_trampoline(ir_node *node) {
5034 ir_node *ptr = get_Builtin_param(node, 0);
5035 ir_node *callee = get_Builtin_param(node, 1);
5036 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5037 ir_node *mem = get_Builtin_mem(node);
5038 ir_node *block = get_nodes_block(node);
5039 ir_node *new_block = be_transform_node(block);
5043 ir_node *trampoline;
5045 dbg_info *dbgi = get_irn_dbg_info(node);
5046 ia32_address_t addr;
5048 /* construct store address */
5049 memset(&addr, 0, sizeof(addr));
5050 ia32_create_address_mode(&addr, ptr, 0);
5052 if (addr.base == NULL) {
5053 addr.base = noreg_GP;
5055 addr.base = be_transform_node(addr.base);
5058 if (addr.index == NULL) {
5059 addr.index = noreg_GP;
5061 addr.index = be_transform_node(addr.index);
5063 addr.mem = be_transform_node(mem);
5065 /* mov ecx, <env> */
5066 val = ia32_create_Immediate(NULL, 0, 0xB9);
5067 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5068 addr.index, addr.mem, val);
5069 set_irn_pinned(store, get_irn_pinned(node));
5070 set_ia32_op_type(store, ia32_AddrModeD);
5071 set_ia32_ls_mode(store, mode_Bu);
5072 set_address(store, &addr);
5076 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5077 addr.index, addr.mem, env);
5078 set_irn_pinned(store, get_irn_pinned(node));
5079 set_ia32_op_type(store, ia32_AddrModeD);
5080 set_ia32_ls_mode(store, mode_Iu);
5081 set_address(store, &addr);
5085 /* jmp rel <callee> */
5086 val = ia32_create_Immediate(NULL, 0, 0xE9);
5087 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5088 addr.index, addr.mem, val);
5089 set_irn_pinned(store, get_irn_pinned(node));
5090 set_ia32_op_type(store, ia32_AddrModeD);
5091 set_ia32_ls_mode(store, mode_Bu);
5092 set_address(store, &addr);
5096 trampoline = be_transform_node(ptr);
5098 /* the callee is typically an immediate */
5099 if (is_SymConst(callee)) {
5100 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5102 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5104 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5106 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5107 addr.index, addr.mem, rel);
5108 set_irn_pinned(store, get_irn_pinned(node));
5109 set_ia32_op_type(store, ia32_AddrModeD);
5110 set_ia32_ls_mode(store, mode_Iu);
5111 set_address(store, &addr);
5116 return new_r_Tuple(new_block, 2, in);
5120 * Transform Builtin node.
5122 static ir_node *gen_Builtin(ir_node *node) {
5123 ir_builtin_kind kind = get_Builtin_kind(node);
5127 return gen_trap(node);
5128 case ir_bk_debugbreak:
5129 return gen_debugbreak(node);
5130 case ir_bk_return_address:
5131 return gen_return_address(node);
5132 case ir_bk_frame_address:
5133 return gen_frame_address(node);
5134 case ir_bk_prefetch:
5135 return gen_prefetch(node);
5137 return gen_ffs(node);
5139 return gen_clz(node);
5141 return gen_ctz(node);
5143 return gen_parity(node);
5144 case ir_bk_popcount:
5145 return gen_popcount(node);
5147 return gen_bswap(node);
5149 return gen_outport(node);
5151 return gen_inport(node);
5152 case ir_bk_inner_trampoline:
5153 return gen_inner_trampoline(node);
5155 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5159 * Transform Proj(Builtin) node.
5161 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5162 ir_node *node = get_Proj_pred(proj);
5163 ir_node *new_node = be_transform_node(node);
5164 ir_builtin_kind kind = get_Builtin_kind(node);
5167 case ir_bk_return_address:
5168 case ir_bk_frame_address:
5173 case ir_bk_popcount:
5175 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5178 case ir_bk_debugbreak:
5179 case ir_bk_prefetch:
5181 assert(get_Proj_proj(proj) == pn_Builtin_M);
5184 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5185 return new_r_Proj(get_nodes_block(new_node),
5186 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5188 assert(get_Proj_proj(proj) == pn_Builtin_M);
5189 return new_r_Proj(get_nodes_block(new_node),
5190 new_node, mode_M, pn_ia32_Inport_M);
5192 case ir_bk_inner_trampoline:
5193 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5194 return get_Tuple_pred(new_node, 1);
5196 assert(get_Proj_proj(proj) == pn_Builtin_M);
5197 return get_Tuple_pred(new_node, 0);
5200 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5203 static ir_node *gen_be_IncSP(ir_node *node)
5205 ir_node *res = be_duplicate_node(node);
5206 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5212 * Transform the Projs from a be_Call.
5214 static ir_node *gen_Proj_be_Call(ir_node *node)
5216 ir_node *block = be_transform_node(get_nodes_block(node));
5217 ir_node *call = get_Proj_pred(node);
5218 ir_node *new_call = be_transform_node(call);
5219 dbg_info *dbgi = get_irn_dbg_info(node);
5220 long proj = get_Proj_proj(node);
5221 ir_mode *mode = get_irn_mode(node);
5224 if (proj == pn_be_Call_M_regular) {
5225 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5227 /* transform call modes */
5228 if (mode_is_data(mode)) {
5229 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5233 /* Map from be_Call to ia32_Call proj number */
5234 if (proj == pn_be_Call_sp) {
5235 proj = pn_ia32_Call_stack;
5236 } else if (proj == pn_be_Call_M_regular) {
5237 proj = pn_ia32_Call_M;
5239 arch_register_req_t const *const req = arch_get_register_req_out(node);
5240 int const n_outs = arch_irn_get_n_outs(new_call);
5243 assert(proj >= pn_be_Call_first_res);
5244 assert(req->type & arch_register_req_type_limited);
5246 for (i = 0; i < n_outs; ++i) {
5247 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5249 if (!(new_req->type & arch_register_req_type_limited) ||
5250 new_req->cls != req->cls ||
5251 *new_req->limited != *req->limited)
5260 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5262 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5264 case pn_ia32_Call_stack:
5265 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5268 case pn_ia32_Call_fpcw:
5269 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5277 * Transform the Projs from a Cmp.
5279 static ir_node *gen_Proj_Cmp(ir_node *node)
5281 /* this probably means not all mode_b nodes were lowered... */
5282 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5287 * Transform the Projs from a Bound.
5289 static ir_node *gen_Proj_Bound(ir_node *node)
5291 ir_node *new_node, *block;
5292 ir_node *pred = get_Proj_pred(node);
5294 switch (get_Proj_proj(node)) {
5296 return be_transform_node(get_Bound_mem(pred));
5297 case pn_Bound_X_regular:
5298 new_node = be_transform_node(pred);
5299 block = get_nodes_block(new_node);
5300 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5301 case pn_Bound_X_except:
5302 new_node = be_transform_node(pred);
5303 block = get_nodes_block(new_node);
5304 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5306 return be_transform_node(get_Bound_index(pred));
5308 panic("unsupported Proj from Bound");
5312 static ir_node *gen_Proj_ASM(ir_node *node)
5314 ir_mode *mode = get_irn_mode(node);
5315 ir_node *pred = get_Proj_pred(node);
5316 ir_node *new_pred = be_transform_node(pred);
5317 ir_node *block = get_nodes_block(new_pred);
5318 long pos = get_Proj_proj(node);
5320 if (mode == mode_M) {
5321 pos = arch_irn_get_n_outs(new_pred)-1;
5322 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5324 } else if (mode_is_float(mode)) {
5327 panic("unexpected proj mode at ASM");
5330 return new_r_Proj(block, new_pred, mode, pos);
5334 * Transform and potentially renumber Proj nodes.
5336 static ir_node *gen_Proj(ir_node *node)
5338 ir_node *pred = get_Proj_pred(node);
5341 switch (get_irn_opcode(pred)) {
5343 proj = get_Proj_proj(node);
5344 if (proj == pn_Store_M) {
5345 return be_transform_node(pred);
5347 panic("No idea how to transform proj->Store");
5350 return gen_Proj_Load(node);
5352 return gen_Proj_ASM(node);
5354 return gen_Proj_Builtin(node);
5358 return gen_Proj_DivMod(node);
5360 return gen_Proj_CopyB(node);
5362 return gen_Proj_Quot(node);
5364 return gen_Proj_be_SubSP(node);
5366 return gen_Proj_be_AddSP(node);
5368 return gen_Proj_be_Call(node);
5370 return gen_Proj_Cmp(node);
5372 return gen_Proj_Bound(node);
5374 proj = get_Proj_proj(node);
5376 case pn_Start_X_initial_exec: {
5377 ir_node *block = get_nodes_block(pred);
5378 ir_node *new_block = be_transform_node(block);
5379 dbg_info *dbgi = get_irn_dbg_info(node);
5380 /* we exchange the ProjX with a jump */
5381 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5386 case pn_Start_P_tls:
5387 return gen_Proj_tls(node);
5392 if (is_ia32_l_FloattoLL(pred)) {
5393 return gen_Proj_l_FloattoLL(node);
5395 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5399 ir_mode *mode = get_irn_mode(node);
5400 if (ia32_mode_needs_gp_reg(mode)) {
5401 ir_node *new_pred = be_transform_node(pred);
5402 ir_node *block = be_transform_node(get_nodes_block(node));
5403 ir_node *new_proj = new_r_Proj(block, new_pred,
5404 mode_Iu, get_Proj_proj(node));
5405 new_proj->node_nr = node->node_nr;
5410 return be_duplicate_node(node);
5414 * Enters all transform functions into the generic pointer
5416 static void register_transformers(void)
5418 /* first clear the generic function pointer for all ops */
5419 clear_irp_opcodes_generic_func();
5421 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5422 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5461 /* transform ops from intrinsic lowering */
5473 GEN(ia32_l_LLtoFloat);
5474 GEN(ia32_l_FloattoLL);
5480 /* we should never see these nodes */
5495 /* handle builtins */
5498 /* handle generic backend nodes */
5512 * Pre-transform all unknown and noreg nodes.
5514 static void ia32_pretransform_node(void)
5516 ia32_code_gen_t *cg = env_cg;
5518 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5519 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5520 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5521 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5522 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5523 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5525 nomem = get_irg_no_mem(current_ir_graph);
5526 noreg_GP = ia32_new_NoReg_gp(cg);
5532 * Walker, checks if all ia32 nodes producing more than one result have their
5533 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5535 static void add_missing_keep_walker(ir_node *node, void *data)
5538 unsigned found_projs = 0;
5539 const ir_edge_t *edge;
5540 ir_mode *mode = get_irn_mode(node);
5545 if (!is_ia32_irn(node))
5548 n_outs = arch_irn_get_n_outs(node);
5551 if (is_ia32_SwitchJmp(node))
5554 assert(n_outs < (int) sizeof(unsigned) * 8);
5555 foreach_out_edge(node, edge) {
5556 ir_node *proj = get_edge_src_irn(edge);
5559 /* The node could be kept */
5563 if (get_irn_mode(proj) == mode_M)
5566 pn = get_Proj_proj(proj);
5567 assert(pn < n_outs);
5568 found_projs |= 1 << pn;
5572 /* are keeps missing? */
5574 for (i = 0; i < n_outs; ++i) {
5577 const arch_register_req_t *req;
5578 const arch_register_class_t *cls;
5580 if (found_projs & (1 << i)) {
5584 req = get_ia32_out_req(node, i);
5589 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5593 block = get_nodes_block(node);
5594 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5595 if (last_keep != NULL) {
5596 be_Keep_add_node(last_keep, cls, in[0]);
5598 last_keep = be_new_Keep(block, 1, in);
5599 if (sched_is_scheduled(node)) {
5600 sched_add_after(node, last_keep);
5607 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5610 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5612 ir_graph *irg = be_get_birg_irg(cg->birg);
5613 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5617 * Post-process all calls if we are in SSE mode.
5618 * The ABI requires that the results are in st0, copy them
5619 * to a xmm register.
5621 static void postprocess_fp_call_results(void) {
5624 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5625 ir_node *call = call_list[i];
5626 ir_type *mtp = call_types[i];
5629 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5630 ir_type *res_tp = get_method_res_type(mtp, j);
5631 ir_node *res, *new_res;
5632 const ir_edge_t *edge, *next;
5635 if (! is_atomic_type(res_tp)) {
5636 /* no floating point return */
5639 mode = get_type_mode(res_tp);
5640 if (! mode_is_float(mode)) {
5641 /* no floating point return */
5645 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5648 /* now patch the users */
5649 foreach_out_edge_safe(res, edge, next) {
5650 ir_node *succ = get_edge_src_irn(edge);
5653 if (be_is_Keep(succ))
5656 if (is_ia32_xStore(succ)) {
5657 /* an xStore can be patched into an vfst */
5658 dbg_info *db = get_irn_dbg_info(succ);
5659 ir_node *block = get_nodes_block(succ);
5660 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5661 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5662 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5663 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5664 ir_mode *mode = get_ia32_ls_mode(succ);
5666 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5667 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5668 if (is_ia32_use_frame(succ))
5669 set_ia32_use_frame(st);
5670 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5671 set_irn_pinned(st, get_irn_pinned(succ));
5672 set_ia32_op_type(st, ia32_AddrModeD);
5676 if (new_res == NULL) {
5677 dbg_info *db = get_irn_dbg_info(call);
5678 ir_node *block = get_nodes_block(call);
5679 ir_node *frame = get_irg_frame(current_ir_graph);
5680 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5681 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5682 ir_node *vfst, *xld, *new_mem;
5684 /* store st(0) on stack */
5685 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5686 set_ia32_op_type(vfst, ia32_AddrModeD);
5687 set_ia32_use_frame(vfst);
5689 /* load into SSE register */
5690 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5691 set_ia32_op_type(xld, ia32_AddrModeS);
5692 set_ia32_use_frame(xld);
5694 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5695 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5697 if (old_mem != NULL) {
5698 edges_reroute(old_mem, new_mem, current_ir_graph);
5702 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5709 /* do the transformation */
5710 void ia32_transform_graph(ia32_code_gen_t *cg)
5714 register_transformers();
5716 initial_fpcw = NULL;
5719 BE_TIMER_PUSH(t_heights);
5720 heights = heights_new(cg->irg);
5721 BE_TIMER_POP(t_heights);
5722 ia32_calculate_non_address_mode_nodes(cg->birg);
5724 /* the transform phase is not safe for CSE (yet) because several nodes get
5725 * attributes set after their creation */
5726 cse_last = get_opt_cse();
5729 call_list = NEW_ARR_F(ir_node *, 0);
5730 call_types = NEW_ARR_F(ir_type *, 0);
5731 be_transform_graph(cg->birg, ia32_pretransform_node);
5733 if (ia32_cg_config.use_sse2)
5734 postprocess_fp_call_results();
5735 DEL_ARR_F(call_types);
5736 DEL_ARR_F(call_list);
5738 set_opt_cse(cse_last);
5740 ia32_free_non_address_mode_nodes();
5741 heights_free(heights);
5745 void ia32_init_transform(void)
5747 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");