2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (mode_is_signed(mode)) {
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1302 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1303 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1305 return proj_res_high;
1309 * Creates an ia32 And.
1311 * @return The created ia32 And node
1313 static ir_node *gen_And(ir_node *node)
1315 ir_node *op1 = get_And_left(node);
1316 ir_node *op2 = get_And_right(node);
1317 assert(! mode_is_float(get_irn_mode(node)));
1319 /* is it a zero extension? */
1320 if (is_Const(op2)) {
1321 tarval *tv = get_Const_tarval(op2);
1322 long v = get_tarval_long(tv);
1324 if (v == 0xFF || v == 0xFFFF) {
1325 dbg_info *dbgi = get_irn_dbg_info(node);
1326 ir_node *block = get_nodes_block(node);
1333 assert(v == 0xFFFF);
1336 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1341 return gen_binop(node, op1, op2, new_bd_ia32_And,
1342 match_commutative | match_mode_neutral | match_am | match_immediate);
1348 * Creates an ia32 Or.
1350 * @return The created ia32 Or node
1352 static ir_node *gen_Or(ir_node *node)
1354 ir_node *op1 = get_Or_left(node);
1355 ir_node *op2 = get_Or_right(node);
1357 assert (! mode_is_float(get_irn_mode(node)));
1358 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1359 | match_mode_neutral | match_am | match_immediate);
1365 * Creates an ia32 Eor.
1367 * @return The created ia32 Eor node
1369 static ir_node *gen_Eor(ir_node *node)
1371 ir_node *op1 = get_Eor_left(node);
1372 ir_node *op2 = get_Eor_right(node);
1374 assert(! mode_is_float(get_irn_mode(node)));
1375 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1376 | match_mode_neutral | match_am | match_immediate);
1381 * Creates an ia32 Sub.
1383 * @return The created ia32 Sub node
1385 static ir_node *gen_Sub(ir_node *node)
1387 ir_node *op1 = get_Sub_left(node);
1388 ir_node *op2 = get_Sub_right(node);
1389 ir_mode *mode = get_irn_mode(node);
1391 if (mode_is_float(mode)) {
1392 if (ia32_cg_config.use_sse2)
1393 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1395 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1398 if (is_Const(op2)) {
1399 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1403 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1404 | match_am | match_immediate);
1407 static ir_node *transform_AM_mem(ir_node *const block,
1408 ir_node *const src_val,
1409 ir_node *const src_mem,
1410 ir_node *const am_mem)
1412 if (is_NoMem(am_mem)) {
1413 return be_transform_node(src_mem);
1414 } else if (is_Proj(src_val) &&
1416 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1417 /* avoid memory loop */
1419 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1420 ir_node *const ptr_pred = get_Proj_pred(src_val);
1421 int const arity = get_Sync_n_preds(src_mem);
1426 NEW_ARR_A(ir_node*, ins, arity + 1);
1428 /* NOTE: This sometimes produces dead-code because the old sync in
1429 * src_mem might not be used anymore, we should detect this case
1430 * and kill the sync... */
1431 for (i = arity - 1; i >= 0; --i) {
1432 ir_node *const pred = get_Sync_pred(src_mem, i);
1434 /* avoid memory loop */
1435 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1438 ins[n++] = be_transform_node(pred);
1443 return new_r_Sync(block, n, ins);
1447 ins[0] = be_transform_node(src_mem);
1449 return new_r_Sync(block, 2, ins);
1454 * Create a 32bit to 64bit signed extension.
1456 * @param dbgi debug info
1457 * @param block the block where node nodes should be placed
1458 * @param val the value to extend
1459 * @param orig the original node
1461 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1462 ir_node *val, const ir_node *orig)
1467 if (ia32_cg_config.use_short_sex_eax) {
1468 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1469 be_dep_on_frame(pval);
1470 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1472 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1473 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1475 SET_IA32_ORIG_NODE(res, orig);
1480 * Generates an ia32 DivMod with additional infrastructure for the
1481 * register allocator if needed.
1483 static ir_node *create_Div(ir_node *node)
1485 dbg_info *dbgi = get_irn_dbg_info(node);
1486 ir_node *block = get_nodes_block(node);
1487 ir_node *new_block = be_transform_node(block);
1494 ir_node *sign_extension;
1495 ia32_address_mode_t am;
1496 ia32_address_t *addr = &am.addr;
1498 /* the upper bits have random contents for smaller modes */
1499 switch (get_irn_opcode(node)) {
1501 op1 = get_Div_left(node);
1502 op2 = get_Div_right(node);
1503 mem = get_Div_mem(node);
1504 mode = get_Div_resmode(node);
1507 op1 = get_Mod_left(node);
1508 op2 = get_Mod_right(node);
1509 mem = get_Mod_mem(node);
1510 mode = get_Mod_resmode(node);
1513 op1 = get_DivMod_left(node);
1514 op2 = get_DivMod_right(node);
1515 mem = get_DivMod_mem(node);
1516 mode = get_DivMod_resmode(node);
1519 panic("invalid divmod node %+F", node);
1522 match_arguments(&am, block, op1, op2, NULL, match_am);
1524 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1525 is the memory of the consumed address. We can have only the second op as address
1526 in Div nodes, so check only op2. */
1527 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1529 if (mode_is_signed(mode)) {
1530 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1531 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1532 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1534 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1535 be_dep_on_frame(sign_extension);
1537 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1538 addr->index, new_mem, am.new_op2,
1539 am.new_op1, sign_extension);
1542 set_irn_pinned(new_node, get_irn_pinned(node));
1544 set_am_attributes(new_node, &am);
1545 SET_IA32_ORIG_NODE(new_node, node);
1547 new_node = fix_mem_proj(new_node, &am);
1553 * Generates an ia32 Mod.
1555 static ir_node *gen_Mod(ir_node *node)
1557 return create_Div(node);
1561 * Generates an ia32 Div.
1563 static ir_node *gen_Div(ir_node *node)
1565 return create_Div(node);
1569 * Generates an ia32 DivMod.
1571 static ir_node *gen_DivMod(ir_node *node)
1573 return create_Div(node);
1579 * Creates an ia32 floating Div.
1581 * @return The created ia32 xDiv node
1583 static ir_node *gen_Quot(ir_node *node)
1585 ir_node *op1 = get_Quot_left(node);
1586 ir_node *op2 = get_Quot_right(node);
1588 if (ia32_cg_config.use_sse2) {
1589 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1591 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1597 * Creates an ia32 Shl.
1599 * @return The created ia32 Shl node
1601 static ir_node *gen_Shl(ir_node *node)
1603 ir_node *left = get_Shl_left(node);
1604 ir_node *right = get_Shl_right(node);
1606 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1607 match_mode_neutral | match_immediate);
1611 * Creates an ia32 Shr.
1613 * @return The created ia32 Shr node
1615 static ir_node *gen_Shr(ir_node *node)
1617 ir_node *left = get_Shr_left(node);
1618 ir_node *right = get_Shr_right(node);
1620 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1626 * Creates an ia32 Sar.
1628 * @return The created ia32 Shrs node
1630 static ir_node *gen_Shrs(ir_node *node)
1632 ir_node *left = get_Shrs_left(node);
1633 ir_node *right = get_Shrs_right(node);
1635 if (is_Const(right)) {
1636 tarval *tv = get_Const_tarval(right);
1637 long val = get_tarval_long(tv);
1639 /* this is a sign extension */
1640 dbg_info *dbgi = get_irn_dbg_info(node);
1641 ir_node *block = be_transform_node(get_nodes_block(node));
1642 ir_node *new_op = be_transform_node(left);
1644 return create_sex_32_64(dbgi, block, new_op, node);
1648 /* 8 or 16 bit sign extension? */
1649 if (is_Const(right) && is_Shl(left)) {
1650 ir_node *shl_left = get_Shl_left(left);
1651 ir_node *shl_right = get_Shl_right(left);
1652 if (is_Const(shl_right)) {
1653 tarval *tv1 = get_Const_tarval(right);
1654 tarval *tv2 = get_Const_tarval(shl_right);
1655 if (tv1 == tv2 && tarval_is_long(tv1)) {
1656 long val = get_tarval_long(tv1);
1657 if (val == 16 || val == 24) {
1658 dbg_info *dbgi = get_irn_dbg_info(node);
1659 ir_node *block = get_nodes_block(node);
1669 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1678 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1684 * Creates an ia32 Rol.
1686 * @param op1 The first operator
1687 * @param op2 The second operator
1688 * @return The created ia32 RotL node
1690 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1692 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1698 * Creates an ia32 Ror.
1699 * NOTE: There is no RotR with immediate because this would always be a RotL
1700 * "imm-mode_size_bits" which can be pre-calculated.
1702 * @param op1 The first operator
1703 * @param op2 The second operator
1704 * @return The created ia32 RotR node
1706 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1708 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1714 * Creates an ia32 RotR or RotL (depending on the found pattern).
1716 * @return The created ia32 RotL or RotR node
1718 static ir_node *gen_Rotl(ir_node *node)
1720 ir_node *rotate = NULL;
1721 ir_node *op1 = get_Rotl_left(node);
1722 ir_node *op2 = get_Rotl_right(node);
1724 /* Firm has only RotL, so we are looking for a right (op2)
1725 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1726 that means we can create a RotR instead of an Add and a RotL */
1730 ir_node *left = get_Add_left(add);
1731 ir_node *right = get_Add_right(add);
1732 if (is_Const(right)) {
1733 tarval *tv = get_Const_tarval(right);
1734 ir_mode *mode = get_irn_mode(node);
1735 long bits = get_mode_size_bits(mode);
1737 if (is_Minus(left) &&
1738 tarval_is_long(tv) &&
1739 get_tarval_long(tv) == bits &&
1742 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1743 rotate = gen_Ror(node, op1, get_Minus_op(left));
1748 if (rotate == NULL) {
1749 rotate = gen_Rol(node, op1, op2);
1758 * Transforms a Minus node.
1760 * @return The created ia32 Minus node
1762 static ir_node *gen_Minus(ir_node *node)
1764 ir_node *op = get_Minus_op(node);
1765 ir_node *block = be_transform_node(get_nodes_block(node));
1766 dbg_info *dbgi = get_irn_dbg_info(node);
1767 ir_mode *mode = get_irn_mode(node);
1772 if (mode_is_float(mode)) {
1773 ir_node *new_op = be_transform_node(op);
1774 if (ia32_cg_config.use_sse2) {
1775 /* TODO: non-optimal... if we have many xXors, then we should
1776 * rather create a load for the const and use that instead of
1777 * several AM nodes... */
1778 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1780 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1781 nomem, new_op, noreg_xmm);
1783 size = get_mode_size_bits(mode);
1784 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1786 set_ia32_am_sc(new_node, ent);
1787 set_ia32_op_type(new_node, ia32_AddrModeS);
1788 set_ia32_ls_mode(new_node, mode);
1790 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1793 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1796 SET_IA32_ORIG_NODE(new_node, node);
1802 * Transforms a Not node.
1804 * @return The created ia32 Not node
1806 static ir_node *gen_Not(ir_node *node)
1808 ir_node *op = get_Not_op(node);
1810 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1811 assert (! mode_is_float(get_irn_mode(node)));
1813 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1819 * Transforms an Abs node.
1821 * @return The created ia32 Abs node
1823 static ir_node *gen_Abs(ir_node *node)
1825 ir_node *block = get_nodes_block(node);
1826 ir_node *new_block = be_transform_node(block);
1827 ir_node *op = get_Abs_op(node);
1828 dbg_info *dbgi = get_irn_dbg_info(node);
1829 ir_mode *mode = get_irn_mode(node);
1835 if (mode_is_float(mode)) {
1836 new_op = be_transform_node(op);
1838 if (ia32_cg_config.use_sse2) {
1839 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1840 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1841 nomem, new_op, noreg_fp);
1843 size = get_mode_size_bits(mode);
1844 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1846 set_ia32_am_sc(new_node, ent);
1848 SET_IA32_ORIG_NODE(new_node, node);
1850 set_ia32_op_type(new_node, ia32_AddrModeS);
1851 set_ia32_ls_mode(new_node, mode);
1853 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1854 SET_IA32_ORIG_NODE(new_node, node);
1857 ir_node *xor, *sign_extension;
1859 if (get_mode_size_bits(mode) == 32) {
1860 new_op = be_transform_node(op);
1862 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1865 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1867 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1868 nomem, new_op, sign_extension);
1869 SET_IA32_ORIG_NODE(xor, node);
1871 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, xor, sign_extension);
1873 SET_IA32_ORIG_NODE(new_node, node);
1880 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1882 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1884 dbg_info *dbgi = get_irn_dbg_info(cmp);
1885 ir_node *block = get_nodes_block(cmp);
1886 ir_node *new_block = be_transform_node(block);
1887 ir_node *op1 = be_transform_node(x);
1888 ir_node *op2 = be_transform_node(n);
1890 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1894 * Transform a node returning a "flag" result.
1896 * @param node the node to transform
1897 * @param pnc_out the compare mode to use
1899 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1906 /* we have a Cmp as input */
1907 if (is_Proj(node)) {
1908 ir_node *pred = get_Proj_pred(node);
1910 pn_Cmp pnc = get_Proj_proj(node);
1911 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1912 ir_node *l = get_Cmp_left(pred);
1913 ir_node *r = get_Cmp_right(pred);
1915 ir_node *la = get_And_left(l);
1916 ir_node *ra = get_And_right(l);
1918 ir_node *c = get_Shl_left(la);
1919 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1920 /* (1 << n) & ra) */
1921 ir_node *n = get_Shl_right(la);
1922 flags = gen_bt(pred, ra, n);
1923 /* we must generate a Jc/Jnc jump */
1924 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1927 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1932 ir_node *c = get_Shl_left(ra);
1933 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1934 /* la & (1 << n)) */
1935 ir_node *n = get_Shl_right(ra);
1936 flags = gen_bt(pred, la, n);
1937 /* we must generate a Jc/Jnc jump */
1938 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1941 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1947 flags = be_transform_node(pred);
1953 /* a mode_b value, we have to compare it against 0 */
1954 dbgi = get_irn_dbg_info(node);
1955 new_block = be_transform_node(get_nodes_block(node));
1956 new_op = be_transform_node(node);
1957 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1958 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1959 *pnc_out = pn_Cmp_Lg;
1964 * Transforms a Load.
1966 * @return the created ia32 Load node
1968 static ir_node *gen_Load(ir_node *node)
1970 ir_node *old_block = get_nodes_block(node);
1971 ir_node *block = be_transform_node(old_block);
1972 ir_node *ptr = get_Load_ptr(node);
1973 ir_node *mem = get_Load_mem(node);
1974 ir_node *new_mem = be_transform_node(mem);
1977 dbg_info *dbgi = get_irn_dbg_info(node);
1978 ir_mode *mode = get_Load_mode(node);
1981 ia32_address_t addr;
1983 /* construct load address */
1984 memset(&addr, 0, sizeof(addr));
1985 ia32_create_address_mode(&addr, ptr, 0);
1992 base = be_transform_node(base);
1995 if (index == NULL) {
1998 index = be_transform_node(index);
2001 if (mode_is_float(mode)) {
2002 if (ia32_cg_config.use_sse2) {
2003 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2005 res_mode = mode_xmm;
2007 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2009 res_mode = mode_vfp;
2012 assert(mode != mode_b);
2014 /* create a conv node with address mode for smaller modes */
2015 if (get_mode_size_bits(mode) < 32) {
2016 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2017 new_mem, noreg_GP, mode);
2019 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2024 set_irn_pinned(new_node, get_irn_pinned(node));
2025 set_ia32_op_type(new_node, ia32_AddrModeS);
2026 set_ia32_ls_mode(new_node, mode);
2027 set_address(new_node, &addr);
2029 if (get_irn_pinned(node) == op_pin_state_floats) {
2030 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2031 && pn_ia32_vfld_res == pn_ia32_Load_res
2032 && pn_ia32_Load_res == pn_ia32_res);
2033 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2036 SET_IA32_ORIG_NODE(new_node, node);
2038 be_dep_on_frame(new_node);
2042 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2043 ir_node *ptr, ir_node *other)
2050 /* we only use address mode if we're the only user of the load */
2051 if (get_irn_n_edges(node) > 1)
2054 load = get_Proj_pred(node);
2057 if (get_nodes_block(load) != block)
2060 /* store should have the same pointer as the load */
2061 if (get_Load_ptr(load) != ptr)
2064 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2065 if (other != NULL &&
2066 get_nodes_block(other) == block &&
2067 heights_reachable_in_block(heights, other, load)) {
2071 if (prevents_AM(block, load, mem))
2073 /* Store should be attached to the load via mem */
2074 assert(heights_reachable_in_block(heights, mem, load));
2079 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2080 ir_node *mem, ir_node *ptr, ir_mode *mode,
2081 construct_binop_dest_func *func,
2082 construct_binop_dest_func *func8bit,
2083 match_flags_t flags)
2085 ir_node *src_block = get_nodes_block(node);
2093 ia32_address_mode_t am;
2094 ia32_address_t *addr = &am.addr;
2095 memset(&am, 0, sizeof(am));
2097 assert(flags & match_immediate); /* there is no destam node without... */
2098 commutative = (flags & match_commutative) != 0;
2100 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2101 build_address(&am, op1, ia32_create_am_double_use);
2102 new_op = create_immediate_or_transform(op2, 0);
2103 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2104 build_address(&am, op2, ia32_create_am_double_use);
2105 new_op = create_immediate_or_transform(op1, 0);
2110 if (addr->base == NULL)
2111 addr->base = noreg_GP;
2112 if (addr->index == NULL)
2113 addr->index = noreg_GP;
2114 if (addr->mem == NULL)
2117 dbgi = get_irn_dbg_info(node);
2118 block = be_transform_node(src_block);
2119 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2121 if (get_mode_size_bits(mode) == 8) {
2122 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2124 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2126 set_address(new_node, addr);
2127 set_ia32_op_type(new_node, ia32_AddrModeD);
2128 set_ia32_ls_mode(new_node, mode);
2129 SET_IA32_ORIG_NODE(new_node, node);
2131 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2132 mem_proj = be_transform_node(am.mem_proj);
2133 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2138 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2139 ir_node *ptr, ir_mode *mode,
2140 construct_unop_dest_func *func)
2142 ir_node *src_block = get_nodes_block(node);
2148 ia32_address_mode_t am;
2149 ia32_address_t *addr = &am.addr;
2151 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2154 memset(&am, 0, sizeof(am));
2155 build_address(&am, op, ia32_create_am_double_use);
2157 dbgi = get_irn_dbg_info(node);
2158 block = be_transform_node(src_block);
2159 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2160 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2161 set_address(new_node, addr);
2162 set_ia32_op_type(new_node, ia32_AddrModeD);
2163 set_ia32_ls_mode(new_node, mode);
2164 SET_IA32_ORIG_NODE(new_node, node);
2166 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2167 mem_proj = be_transform_node(am.mem_proj);
2168 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2173 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2175 ir_mode *mode = get_irn_mode(node);
2176 ir_node *mux_true = get_Mux_true(node);
2177 ir_node *mux_false = get_Mux_false(node);
2187 ia32_address_t addr;
2189 if (get_mode_size_bits(mode) != 8)
2192 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2194 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2200 build_address_ptr(&addr, ptr, mem);
2202 dbgi = get_irn_dbg_info(node);
2203 block = get_nodes_block(node);
2204 new_block = be_transform_node(block);
2205 cond = get_Mux_sel(node);
2206 flags = get_flags_node(cond, &pnc);
2207 new_mem = be_transform_node(mem);
2208 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2209 addr.index, addr.mem, flags, pnc, negated);
2210 set_address(new_node, &addr);
2211 set_ia32_op_type(new_node, ia32_AddrModeD);
2212 set_ia32_ls_mode(new_node, mode);
2213 SET_IA32_ORIG_NODE(new_node, node);
2218 static ir_node *try_create_dest_am(ir_node *node)
2220 ir_node *val = get_Store_value(node);
2221 ir_node *mem = get_Store_mem(node);
2222 ir_node *ptr = get_Store_ptr(node);
2223 ir_mode *mode = get_irn_mode(val);
2224 unsigned bits = get_mode_size_bits(mode);
2229 /* handle only GP modes for now... */
2230 if (!ia32_mode_needs_gp_reg(mode))
2234 /* store must be the only user of the val node */
2235 if (get_irn_n_edges(val) > 1)
2237 /* skip pointless convs */
2239 ir_node *conv_op = get_Conv_op(val);
2240 ir_mode *pred_mode = get_irn_mode(conv_op);
2241 if (!ia32_mode_needs_gp_reg(pred_mode))
2243 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2251 /* value must be in the same block */
2252 if (get_nodes_block(node) != get_nodes_block(val))
2255 switch (get_irn_opcode(val)) {
2257 op1 = get_Add_left(val);
2258 op2 = get_Add_right(val);
2259 if (ia32_cg_config.use_incdec) {
2260 if (is_Const_1(op2)) {
2261 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2263 } else if (is_Const_Minus_1(op2)) {
2264 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2268 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2269 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2270 match_commutative | match_immediate);
2273 op1 = get_Sub_left(val);
2274 op2 = get_Sub_right(val);
2275 if (is_Const(op2)) {
2276 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2278 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2279 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2283 op1 = get_And_left(val);
2284 op2 = get_And_right(val);
2285 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2286 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2287 match_commutative | match_immediate);
2290 op1 = get_Or_left(val);
2291 op2 = get_Or_right(val);
2292 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2293 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2294 match_commutative | match_immediate);
2297 op1 = get_Eor_left(val);
2298 op2 = get_Eor_right(val);
2299 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2300 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2301 match_commutative | match_immediate);
2304 op1 = get_Shl_left(val);
2305 op2 = get_Shl_right(val);
2306 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2307 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2311 op1 = get_Shr_left(val);
2312 op2 = get_Shr_right(val);
2313 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2314 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2318 op1 = get_Shrs_left(val);
2319 op2 = get_Shrs_right(val);
2320 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2321 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2325 op1 = get_Rotl_left(val);
2326 op2 = get_Rotl_right(val);
2327 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2328 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2331 /* TODO: match ROR patterns... */
2333 new_node = try_create_SetMem(val, ptr, mem);
2336 op1 = get_Minus_op(val);
2337 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2340 /* should be lowered already */
2341 assert(mode != mode_b);
2342 op1 = get_Not_op(val);
2343 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2349 if (new_node != NULL) {
2350 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2351 get_irn_pinned(node) == op_pin_state_pinned) {
2352 set_irn_pinned(new_node, op_pin_state_pinned);
2359 static bool possible_int_mode_for_fp(ir_mode *mode)
2363 if (!mode_is_signed(mode))
2365 size = get_mode_size_bits(mode);
2366 if (size != 16 && size != 32)
2371 static int is_float_to_int_conv(const ir_node *node)
2373 ir_mode *mode = get_irn_mode(node);
2377 if (!possible_int_mode_for_fp(mode))
2382 conv_op = get_Conv_op(node);
2383 conv_mode = get_irn_mode(conv_op);
2385 if (!mode_is_float(conv_mode))
2392 * Transform a Store(floatConst) into a sequence of
2395 * @return the created ia32 Store node
2397 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2399 ir_mode *mode = get_irn_mode(cns);
2400 unsigned size = get_mode_size_bytes(mode);
2401 tarval *tv = get_Const_tarval(cns);
2402 ir_node *block = get_nodes_block(node);
2403 ir_node *new_block = be_transform_node(block);
2404 ir_node *ptr = get_Store_ptr(node);
2405 ir_node *mem = get_Store_mem(node);
2406 dbg_info *dbgi = get_irn_dbg_info(node);
2410 ia32_address_t addr;
2412 assert(size % 4 == 0);
2415 build_address_ptr(&addr, ptr, mem);
2419 get_tarval_sub_bits(tv, ofs) |
2420 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2421 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2422 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2423 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2425 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2426 addr.index, addr.mem, imm);
2428 set_irn_pinned(new_node, get_irn_pinned(node));
2429 set_ia32_op_type(new_node, ia32_AddrModeD);
2430 set_ia32_ls_mode(new_node, mode_Iu);
2431 set_address(new_node, &addr);
2432 SET_IA32_ORIG_NODE(new_node, node);
2435 ins[i++] = new_node;
2440 } while (size != 0);
2443 return new_rd_Sync(dbgi, new_block, i, ins);
2450 * Generate a vfist or vfisttp instruction.
2452 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2453 ir_node *mem, ir_node *val, ir_node **fist)
2457 if (ia32_cg_config.use_fisttp) {
2458 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2459 if other users exists */
2460 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2461 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2462 be_new_Keep(block, 1, &value);
2464 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2467 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2470 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2476 * Transforms a general (no special case) Store.
2478 * @return the created ia32 Store node
2480 static ir_node *gen_general_Store(ir_node *node)
2482 ir_node *val = get_Store_value(node);
2483 ir_mode *mode = get_irn_mode(val);
2484 ir_node *block = get_nodes_block(node);
2485 ir_node *new_block = be_transform_node(block);
2486 ir_node *ptr = get_Store_ptr(node);
2487 ir_node *mem = get_Store_mem(node);
2488 dbg_info *dbgi = get_irn_dbg_info(node);
2489 ir_node *new_val, *new_node, *store;
2490 ia32_address_t addr;
2492 /* check for destination address mode */
2493 new_node = try_create_dest_am(node);
2494 if (new_node != NULL)
2497 /* construct store address */
2498 memset(&addr, 0, sizeof(addr));
2499 ia32_create_address_mode(&addr, ptr, 0);
2501 if (addr.base == NULL) {
2502 addr.base = noreg_GP;
2504 addr.base = be_transform_node(addr.base);
2507 if (addr.index == NULL) {
2508 addr.index = noreg_GP;
2510 addr.index = be_transform_node(addr.index);
2512 addr.mem = be_transform_node(mem);
2514 if (mode_is_float(mode)) {
2515 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2517 while (is_Conv(val) && mode == get_irn_mode(val)) {
2518 ir_node *op = get_Conv_op(val);
2519 if (!mode_is_float(get_irn_mode(op)))
2523 new_val = be_transform_node(val);
2524 if (ia32_cg_config.use_sse2) {
2525 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2526 addr.index, addr.mem, new_val);
2528 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2529 addr.index, addr.mem, new_val, mode);
2532 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2533 val = get_Conv_op(val);
2535 /* TODO: is this optimisation still necessary at all (middleend)? */
2536 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2537 while (is_Conv(val)) {
2538 ir_node *op = get_Conv_op(val);
2539 if (!mode_is_float(get_irn_mode(op)))
2541 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2545 new_val = be_transform_node(val);
2546 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2548 new_val = create_immediate_or_transform(val, 0);
2549 assert(mode != mode_b);
2551 if (get_mode_size_bits(mode) == 8) {
2552 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2553 addr.index, addr.mem, new_val);
2555 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2556 addr.index, addr.mem, new_val);
2561 set_irn_pinned(store, get_irn_pinned(node));
2562 set_ia32_op_type(store, ia32_AddrModeD);
2563 set_ia32_ls_mode(store, mode);
2565 set_address(store, &addr);
2566 SET_IA32_ORIG_NODE(store, node);
2572 * Transforms a Store.
2574 * @return the created ia32 Store node
2576 static ir_node *gen_Store(ir_node *node)
2578 ir_node *val = get_Store_value(node);
2579 ir_mode *mode = get_irn_mode(val);
2581 if (mode_is_float(mode) && is_Const(val)) {
2582 /* We can transform every floating const store
2583 into a sequence of integer stores.
2584 If the constant is already in a register,
2585 it would be better to use it, but we don't
2586 have this information here. */
2587 return gen_float_const_Store(node, val);
2589 return gen_general_Store(node);
2593 * Transforms a Switch.
2595 * @return the created ia32 SwitchJmp node
2597 static ir_node *create_Switch(ir_node *node)
2599 dbg_info *dbgi = get_irn_dbg_info(node);
2600 ir_node *block = be_transform_node(get_nodes_block(node));
2601 ir_node *sel = get_Cond_selector(node);
2602 ir_node *new_sel = be_transform_node(sel);
2603 long switch_min = LONG_MAX;
2604 long switch_max = LONG_MIN;
2605 long default_pn = get_Cond_default_proj(node);
2607 const ir_edge_t *edge;
2609 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2611 /* determine the smallest switch case value */
2612 foreach_out_edge(node, edge) {
2613 ir_node *proj = get_edge_src_irn(edge);
2614 long pn = get_Proj_proj(proj);
2615 if (pn == default_pn)
2618 if (pn < switch_min)
2620 if (pn > switch_max)
2624 if ((unsigned long) (switch_max - switch_min) > 128000) {
2625 panic("Size of switch %+F bigger than 128000", node);
2628 if (switch_min != 0) {
2629 /* if smallest switch case is not 0 we need an additional sub */
2630 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2631 add_ia32_am_offs_int(new_sel, -switch_min);
2632 set_ia32_op_type(new_sel, ia32_AddrModeS);
2634 SET_IA32_ORIG_NODE(new_sel, node);
2637 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2638 SET_IA32_ORIG_NODE(new_node, node);
2644 * Transform a Cond node.
2646 static ir_node *gen_Cond(ir_node *node)
2648 ir_node *block = get_nodes_block(node);
2649 ir_node *new_block = be_transform_node(block);
2650 dbg_info *dbgi = get_irn_dbg_info(node);
2651 ir_node *sel = get_Cond_selector(node);
2652 ir_mode *sel_mode = get_irn_mode(sel);
2653 ir_node *flags = NULL;
2657 if (sel_mode != mode_b) {
2658 return create_Switch(node);
2661 /* we get flags from a Cmp */
2662 flags = get_flags_node(sel, &pnc);
2664 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2665 SET_IA32_ORIG_NODE(new_node, node);
2671 * Transform a be_Copy.
2673 static ir_node *gen_be_Copy(ir_node *node)
2675 ir_node *new_node = be_duplicate_node(node);
2676 ir_mode *mode = get_irn_mode(new_node);
2678 if (ia32_mode_needs_gp_reg(mode)) {
2679 set_irn_mode(new_node, mode_Iu);
2685 static ir_node *create_Fucom(ir_node *node)
2687 dbg_info *dbgi = get_irn_dbg_info(node);
2688 ir_node *block = get_nodes_block(node);
2689 ir_node *new_block = be_transform_node(block);
2690 ir_node *left = get_Cmp_left(node);
2691 ir_node *new_left = be_transform_node(left);
2692 ir_node *right = get_Cmp_right(node);
2696 if (ia32_cg_config.use_fucomi) {
2697 new_right = be_transform_node(right);
2698 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2700 set_ia32_commutative(new_node);
2701 SET_IA32_ORIG_NODE(new_node, node);
2703 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2704 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2706 new_right = be_transform_node(right);
2707 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2710 set_ia32_commutative(new_node);
2712 SET_IA32_ORIG_NODE(new_node, node);
2714 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2715 SET_IA32_ORIG_NODE(new_node, node);
2721 static ir_node *create_Ucomi(ir_node *node)
2723 dbg_info *dbgi = get_irn_dbg_info(node);
2724 ir_node *src_block = get_nodes_block(node);
2725 ir_node *new_block = be_transform_node(src_block);
2726 ir_node *left = get_Cmp_left(node);
2727 ir_node *right = get_Cmp_right(node);
2729 ia32_address_mode_t am;
2730 ia32_address_t *addr = &am.addr;
2732 match_arguments(&am, src_block, left, right, NULL,
2733 match_commutative | match_am);
2735 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2736 addr->mem, am.new_op1, am.new_op2,
2738 set_am_attributes(new_node, &am);
2740 SET_IA32_ORIG_NODE(new_node, node);
2742 new_node = fix_mem_proj(new_node, &am);
2748 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2749 * to fold an and into a test node
2751 static bool can_fold_test_and(ir_node *node)
2753 const ir_edge_t *edge;
2755 /** we can only have eq and lg projs */
2756 foreach_out_edge(node, edge) {
2757 ir_node *proj = get_edge_src_irn(edge);
2758 pn_Cmp pnc = get_Proj_proj(proj);
2759 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2767 * returns true if it is assured, that the upper bits of a node are "clean"
2768 * which means for a 16 or 8 bit value, that the upper bits in the register
2769 * are 0 for unsigned and a copy of the last significant bit for signed
2772 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2774 assert(ia32_mode_needs_gp_reg(mode));
2775 if (get_mode_size_bits(mode) >= 32)
2778 if (is_Proj(transformed_node))
2779 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2781 switch (get_ia32_irn_opcode(transformed_node)) {
2782 case iro_ia32_Conv_I2I:
2783 case iro_ia32_Conv_I2I8Bit: {
2784 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2785 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2787 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2794 if (mode_is_signed(mode)) {
2795 return false; /* TODO handle signed modes */
2797 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2798 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2799 const ia32_immediate_attr_t *attr
2800 = get_ia32_immediate_attr_const(right);
2801 if (attr->symconst == 0 &&
2802 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2806 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2810 /* TODO too conservative if shift amount is constant */
2811 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2814 if (!mode_is_signed(mode)) {
2816 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2817 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2819 /* TODO if one is known to be zero extended, then || is sufficient */
2824 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2825 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2827 case iro_ia32_Const:
2828 case iro_ia32_Immediate: {
2829 const ia32_immediate_attr_t *attr =
2830 get_ia32_immediate_attr_const(transformed_node);
2831 if (mode_is_signed(mode)) {
2832 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2833 return shifted == 0 || shifted == -1;
2835 unsigned long shifted = (unsigned long)attr->offset;
2836 shifted >>= get_mode_size_bits(mode);
2837 return shifted == 0;
2847 * Generate code for a Cmp.
2849 static ir_node *gen_Cmp(ir_node *node)
2851 dbg_info *dbgi = get_irn_dbg_info(node);
2852 ir_node *block = get_nodes_block(node);
2853 ir_node *new_block = be_transform_node(block);
2854 ir_node *left = get_Cmp_left(node);
2855 ir_node *right = get_Cmp_right(node);
2856 ir_mode *cmp_mode = get_irn_mode(left);
2858 ia32_address_mode_t am;
2859 ia32_address_t *addr = &am.addr;
2862 if (mode_is_float(cmp_mode)) {
2863 if (ia32_cg_config.use_sse2) {
2864 return create_Ucomi(node);
2866 return create_Fucom(node);
2870 assert(ia32_mode_needs_gp_reg(cmp_mode));
2872 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2873 cmp_unsigned = !mode_is_signed(cmp_mode);
2874 if (is_Const_0(right) &&
2876 get_irn_n_edges(left) == 1 &&
2877 can_fold_test_and(node)) {
2878 /* Test(and_left, and_right) */
2879 ir_node *and_left = get_And_left(left);
2880 ir_node *and_right = get_And_right(left);
2882 /* matze: code here used mode instead of cmd_mode, I think it is always
2883 * the same as cmp_mode, but I leave this here to see if this is really
2886 assert(get_irn_mode(and_left) == cmp_mode);
2888 match_arguments(&am, block, and_left, and_right, NULL,
2890 match_am | match_8bit_am | match_16bit_am |
2891 match_am_and_immediates | match_immediate);
2893 /* use 32bit compare mode if possible since the opcode is smaller */
2894 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2895 upper_bits_clean(am.new_op2, cmp_mode)) {
2896 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2899 if (get_mode_size_bits(cmp_mode) == 8) {
2900 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2901 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2904 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2905 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2908 /* Cmp(left, right) */
2909 match_arguments(&am, block, left, right, NULL,
2910 match_commutative | match_am | match_8bit_am |
2911 match_16bit_am | match_am_and_immediates |
2913 /* use 32bit compare mode if possible since the opcode is smaller */
2914 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2915 upper_bits_clean(am.new_op2, cmp_mode)) {
2916 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2919 if (get_mode_size_bits(cmp_mode) == 8) {
2920 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2921 addr->index, addr->mem, am.new_op1,
2922 am.new_op2, am.ins_permuted,
2925 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2926 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2929 set_am_attributes(new_node, &am);
2930 set_ia32_ls_mode(new_node, cmp_mode);
2932 SET_IA32_ORIG_NODE(new_node, node);
2934 new_node = fix_mem_proj(new_node, &am);
2939 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2942 dbg_info *dbgi = get_irn_dbg_info(node);
2943 ir_node *block = get_nodes_block(node);
2944 ir_node *new_block = be_transform_node(block);
2945 ir_node *val_true = get_Mux_true(node);
2946 ir_node *val_false = get_Mux_false(node);
2948 ia32_address_mode_t am;
2949 ia32_address_t *addr;
2951 assert(ia32_cg_config.use_cmov);
2952 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2956 match_arguments(&am, block, val_false, val_true, flags,
2957 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2959 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2960 addr->mem, am.new_op1, am.new_op2, new_flags,
2961 am.ins_permuted, pnc);
2962 set_am_attributes(new_node, &am);
2964 SET_IA32_ORIG_NODE(new_node, node);
2966 new_node = fix_mem_proj(new_node, &am);
2972 * Creates a ia32 Setcc instruction.
2974 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2975 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2978 ir_mode *mode = get_irn_mode(orig_node);
2981 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2982 SET_IA32_ORIG_NODE(new_node, orig_node);
2984 /* we might need to conv the result up */
2985 if (get_mode_size_bits(mode) > 8) {
2986 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2987 nomem, new_node, mode_Bu);
2988 SET_IA32_ORIG_NODE(new_node, orig_node);
2995 * Create instruction for an unsigned Difference or Zero.
2997 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2999 ir_mode *mode = get_irn_mode(psi);
3000 ir_node *new_node, *sub, *sbb, *eflags, *block;
3004 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3005 match_mode_neutral | match_am | match_immediate | match_two_users);
3007 block = get_nodes_block(new_node);
3009 if (is_Proj(new_node)) {
3010 sub = get_Proj_pred(new_node);
3011 assert(is_ia32_Sub(sub));
3014 set_irn_mode(sub, mode_T);
3015 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3017 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3019 dbgi = get_irn_dbg_info(psi);
3020 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3022 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3023 set_ia32_commutative(new_node);
3028 * Create an const array of two float consts.
3030 * @param c0 the first constant
3031 * @param c1 the second constant
3032 * @param new_mode IN/OUT for the mode of the constants, if NULL
3033 * smallest possible mode will be used
3035 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3037 ir_mode *mode = *new_mode;
3039 ir_initializer_t *initializer;
3040 tarval *tv0 = get_Const_tarval(c0);
3041 tarval *tv1 = get_Const_tarval(c1);
3044 /* detect the best mode for the constants */
3045 mode = get_tarval_mode(tv0);
3047 if (mode != mode_F) {
3048 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3049 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3051 tv0 = tarval_convert_to(tv0, mode);
3052 tv1 = tarval_convert_to(tv1, mode);
3053 } else if (mode != mode_D) {
3054 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3055 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3057 tv0 = tarval_convert_to(tv0, mode);
3058 tv1 = tarval_convert_to(tv1, mode);
3065 tp = ia32_create_float_type(mode, 4);
3066 tp = ia32_create_float_array(tp);
3068 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3070 set_entity_ld_ident(ent, get_entity_ident(ent));
3071 set_entity_visibility(ent, visibility_local);
3072 set_entity_variability(ent, variability_constant);
3073 set_entity_allocation(ent, allocation_static);
3075 initializer = create_initializer_compound(2);
3077 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3078 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3080 set_entity_initializer(ent, initializer);
3087 * Transforms a Mux node into some code sequence.
3089 * @return The transformed node.
3091 static ir_node *gen_Mux(ir_node *node)
3093 dbg_info *dbgi = get_irn_dbg_info(node);
3094 ir_node *block = get_nodes_block(node);
3095 ir_node *new_block = be_transform_node(block);
3096 ir_node *mux_true = get_Mux_true(node);
3097 ir_node *mux_false = get_Mux_false(node);
3098 ir_node *cond = get_Mux_sel(node);
3099 ir_mode *mode = get_irn_mode(node);
3104 assert(get_irn_mode(cond) == mode_b);
3106 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3107 if (mode_is_float(mode)) {
3108 ir_node *cmp = get_Proj_pred(cond);
3109 ir_node *cmp_left = get_Cmp_left(cmp);
3110 ir_node *cmp_right = get_Cmp_right(cmp);
3111 pn_Cmp pnc = get_Proj_proj(cond);
3113 if (ia32_cg_config.use_sse2) {
3114 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3115 if (cmp_left == mux_true && cmp_right == mux_false) {
3116 /* Mux(a <= b, a, b) => MIN */
3117 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3118 match_commutative | match_am | match_two_users);
3119 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3120 /* Mux(a <= b, b, a) => MAX */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3122 match_commutative | match_am | match_two_users);
3124 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3125 if (cmp_left == mux_true && cmp_right == mux_false) {
3126 /* Mux(a >= b, a, b) => MAX */
3127 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3128 match_commutative | match_am | match_two_users);
3129 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3130 /* Mux(a >= b, b, a) => MIN */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3132 match_commutative | match_am | match_two_users);
3136 if (is_Const(mux_true) && is_Const(mux_false)) {
3137 ia32_address_mode_t am;
3142 flags = get_flags_node(cond, &pnc);
3143 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_permuted=*/0);
3145 if (ia32_cg_config.use_sse2) {
3146 /* cannot load from different mode on SSE */
3149 /* x87 can load any mode */
3153 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3155 switch (get_mode_size_bytes(new_mode)) {
3165 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3166 set_ia32_am_scale(new_node, 2);
3171 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3172 set_ia32_am_scale(new_node, 1);
3175 /* arg, shift 16 NOT supported */
3177 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3180 panic("Unsupported constant size");
3183 am.ls_mode = new_mode;
3184 am.addr.base = noreg_GP;
3185 am.addr.index = new_node;
3186 am.addr.mem = nomem;
3188 am.addr.scale = scale;
3189 am.addr.use_frame = 0;
3190 am.addr.frame_entity = NULL;
3191 am.addr.symconst_sign = 0;
3192 am.mem_proj = am.addr.mem;
3193 am.op_type = ia32_AddrModeS;
3196 am.pinned = op_pin_state_floats;
3198 am.ins_permuted = 0;
3200 if (ia32_cg_config.use_sse2)
3201 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3203 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3204 set_am_attributes(load, &am);
3206 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3208 panic("cannot transform floating point Mux");
3211 assert(ia32_mode_needs_gp_reg(mode));
3213 if (is_Proj(cond)) {
3214 ir_node *cmp = get_Proj_pred(cond);
3216 ir_node *cmp_left = get_Cmp_left(cmp);
3217 ir_node *cmp_right = get_Cmp_right(cmp);
3218 pn_Cmp pnc = get_Proj_proj(cond);
3220 /* check for unsigned Doz first */
3221 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3222 is_Const_0(mux_false) && is_Sub(mux_true) &&
3223 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3224 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3225 return create_Doz(node, cmp_left, cmp_right);
3226 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3227 is_Const_0(mux_true) && is_Sub(mux_false) &&
3228 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3229 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3230 return create_Doz(node, cmp_left, cmp_right);
3235 flags = get_flags_node(cond, &pnc);
3237 if (is_Const(mux_true) && is_Const(mux_false)) {
3238 /* both are const, good */
3239 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3240 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3241 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3242 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3244 /* Not that simple. */
3249 new_node = create_CMov(node, cond, flags, pnc);
3257 * Create a conversion from x87 state register to general purpose.
3259 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3261 ir_node *block = be_transform_node(get_nodes_block(node));
3262 ir_node *op = get_Conv_op(node);
3263 ir_node *new_op = be_transform_node(op);
3264 ir_graph *irg = current_ir_graph;
3265 dbg_info *dbgi = get_irn_dbg_info(node);
3266 ir_mode *mode = get_irn_mode(node);
3267 ir_node *fist, *load, *mem;
3269 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3270 set_irn_pinned(fist, op_pin_state_floats);
3271 set_ia32_use_frame(fist);
3272 set_ia32_op_type(fist, ia32_AddrModeD);
3274 assert(get_mode_size_bits(mode) <= 32);
3275 /* exception we can only store signed 32 bit integers, so for unsigned
3276 we store a 64bit (signed) integer and load the lower bits */
3277 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3278 set_ia32_ls_mode(fist, mode_Ls);
3280 set_ia32_ls_mode(fist, mode_Is);
3282 SET_IA32_ORIG_NODE(fist, node);
3285 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3287 set_irn_pinned(load, op_pin_state_floats);
3288 set_ia32_use_frame(load);
3289 set_ia32_op_type(load, ia32_AddrModeS);
3290 set_ia32_ls_mode(load, mode_Is);
3291 if (get_ia32_ls_mode(fist) == mode_Ls) {
3292 ia32_attr_t *attr = get_ia32_attr(load);
3293 attr->data.need_64bit_stackent = 1;
3295 ia32_attr_t *attr = get_ia32_attr(load);
3296 attr->data.need_32bit_stackent = 1;
3298 SET_IA32_ORIG_NODE(load, node);
3300 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3304 * Creates a x87 strict Conv by placing a Store and a Load
3306 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3308 ir_node *block = get_nodes_block(node);
3309 ir_graph *irg = get_Block_irg(block);
3310 dbg_info *dbgi = get_irn_dbg_info(node);
3311 ir_node *frame = get_irg_frame(irg);
3312 ir_node *store, *load;
3315 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3316 set_ia32_use_frame(store);
3317 set_ia32_op_type(store, ia32_AddrModeD);
3318 SET_IA32_ORIG_NODE(store, node);
3320 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3321 set_ia32_use_frame(load);
3322 set_ia32_op_type(load, ia32_AddrModeS);
3323 SET_IA32_ORIG_NODE(load, node);
3325 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3329 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3330 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3332 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3334 func = get_mode_size_bits(mode) == 8 ?
3335 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3336 return func(dbgi, block, base, index, mem, val, mode);
3340 * Create a conversion from general purpose to x87 register
3342 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3344 ir_node *src_block = get_nodes_block(node);
3345 ir_node *block = be_transform_node(src_block);
3346 ir_graph *irg = get_Block_irg(block);
3347 dbg_info *dbgi = get_irn_dbg_info(node);
3348 ir_node *op = get_Conv_op(node);
3349 ir_node *new_op = NULL;
3351 ir_mode *store_mode;
3356 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3357 if (possible_int_mode_for_fp(src_mode)) {
3358 ia32_address_mode_t am;
3360 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3361 if (am.op_type == ia32_AddrModeS) {
3362 ia32_address_t *addr = &am.addr;
3364 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3365 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3367 set_am_attributes(fild, &am);
3368 SET_IA32_ORIG_NODE(fild, node);
3370 fix_mem_proj(fild, &am);
3375 if (new_op == NULL) {
3376 new_op = be_transform_node(op);
3379 mode = get_irn_mode(op);
3381 /* first convert to 32 bit signed if necessary */
3382 if (get_mode_size_bits(src_mode) < 32) {
3383 if (!upper_bits_clean(new_op, src_mode)) {
3384 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3385 SET_IA32_ORIG_NODE(new_op, node);
3390 assert(get_mode_size_bits(mode) == 32);
3393 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3395 set_ia32_use_frame(store);
3396 set_ia32_op_type(store, ia32_AddrModeD);
3397 set_ia32_ls_mode(store, mode_Iu);
3399 /* exception for 32bit unsigned, do a 64bit spill+load */
3400 if (!mode_is_signed(mode)) {
3403 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3405 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3406 noreg_GP, nomem, zero_const);
3408 set_ia32_use_frame(zero_store);
3409 set_ia32_op_type(zero_store, ia32_AddrModeD);
3410 add_ia32_am_offs_int(zero_store, 4);
3411 set_ia32_ls_mode(zero_store, mode_Iu);
3416 store = new_rd_Sync(dbgi, block, 2, in);
3417 store_mode = mode_Ls;
3419 store_mode = mode_Is;
3423 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3425 set_ia32_use_frame(fild);
3426 set_ia32_op_type(fild, ia32_AddrModeS);
3427 set_ia32_ls_mode(fild, store_mode);
3429 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3435 * Create a conversion from one integer mode into another one
3437 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3438 dbg_info *dbgi, ir_node *block, ir_node *op,
3441 ir_node *new_block = be_transform_node(block);
3443 ir_mode *smaller_mode;
3444 ia32_address_mode_t am;
3445 ia32_address_t *addr = &am.addr;
3448 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3449 smaller_mode = src_mode;
3451 smaller_mode = tgt_mode;
3454 #ifdef DEBUG_libfirm
3456 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3461 match_arguments(&am, block, NULL, op, NULL,
3462 match_am | match_8bit_am | match_16bit_am);
3464 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3465 /* unnecessary conv. in theory it shouldn't have been AM */
3466 assert(is_ia32_NoReg_GP(addr->base));
3467 assert(is_ia32_NoReg_GP(addr->index));
3468 assert(is_NoMem(addr->mem));
3469 assert(am.addr.offset == 0);
3470 assert(am.addr.symconst_ent == NULL);
3474 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3475 addr->mem, am.new_op2, smaller_mode);
3476 set_am_attributes(new_node, &am);
3477 /* match_arguments assume that out-mode = in-mode, this isn't true here
3479 set_ia32_ls_mode(new_node, smaller_mode);
3480 SET_IA32_ORIG_NODE(new_node, node);
3481 new_node = fix_mem_proj(new_node, &am);
3486 * Transforms a Conv node.
3488 * @return The created ia32 Conv node
3490 static ir_node *gen_Conv(ir_node *node)
3492 ir_node *block = get_nodes_block(node);
3493 ir_node *new_block = be_transform_node(block);
3494 ir_node *op = get_Conv_op(node);
3495 ir_node *new_op = NULL;
3496 dbg_info *dbgi = get_irn_dbg_info(node);
3497 ir_mode *src_mode = get_irn_mode(op);
3498 ir_mode *tgt_mode = get_irn_mode(node);
3499 int src_bits = get_mode_size_bits(src_mode);
3500 int tgt_bits = get_mode_size_bits(tgt_mode);
3501 ir_node *res = NULL;
3503 assert(!mode_is_int(src_mode) || src_bits <= 32);
3504 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3506 if (src_mode == mode_b) {
3507 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3508 /* nothing to do, we already model bools as 0/1 ints */
3509 return be_transform_node(op);
3512 if (src_mode == tgt_mode) {
3513 if (get_Conv_strict(node)) {
3514 if (ia32_cg_config.use_sse2) {
3515 /* when we are in SSE mode, we can kill all strict no-op conversion */
3516 return be_transform_node(op);
3519 /* this should be optimized already, but who knows... */
3520 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3521 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3522 return be_transform_node(op);
3526 if (mode_is_float(src_mode)) {
3527 new_op = be_transform_node(op);
3528 /* we convert from float ... */
3529 if (mode_is_float(tgt_mode)) {
3531 /* Matze: I'm a bit unsure what the following is for? seems wrong
3533 if (src_mode == mode_E && tgt_mode == mode_D
3534 && !get_Conv_strict(node)) {
3535 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3541 if (ia32_cg_config.use_sse2) {
3542 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3543 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3545 set_ia32_ls_mode(res, tgt_mode);
3547 if (get_Conv_strict(node)) {
3548 /* if fp_no_float_fold is not set then we assume that we
3549 * don't have any float operations in a non
3550 * mode_float_arithmetic mode and can skip strict upconvs */
3551 if (src_bits < tgt_bits
3552 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3553 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3556 res = gen_x87_strict_conv(tgt_mode, new_op);
3557 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3561 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3566 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3567 if (ia32_cg_config.use_sse2) {
3568 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3570 set_ia32_ls_mode(res, src_mode);
3572 return gen_x87_fp_to_gp(node);
3576 /* we convert from int ... */
3577 if (mode_is_float(tgt_mode)) {
3579 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3580 if (ia32_cg_config.use_sse2) {
3581 new_op = be_transform_node(op);
3582 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3584 set_ia32_ls_mode(res, tgt_mode);
3586 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3587 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3588 res = gen_x87_gp_to_fp(node, src_mode);
3590 /* we need a strict-Conv, if the int mode has more bits than the
3592 if (float_mantissa < int_mantissa) {
3593 res = gen_x87_strict_conv(tgt_mode, res);
3594 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3598 } else if (tgt_mode == mode_b) {
3599 /* mode_b lowering already took care that we only have 0/1 values */
3600 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3601 src_mode, tgt_mode));
3602 return be_transform_node(op);
3605 if (src_bits == tgt_bits) {
3606 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3607 src_mode, tgt_mode));
3608 return be_transform_node(op);
3611 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3619 static ir_node *create_immediate_or_transform(ir_node *node,
3620 char immediate_constraint_type)
3622 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3623 if (new_node == NULL) {
3624 new_node = be_transform_node(node);
3630 * Transforms a FrameAddr into an ia32 Add.
3632 static ir_node *gen_be_FrameAddr(ir_node *node)
3634 ir_node *block = be_transform_node(get_nodes_block(node));
3635 ir_node *op = be_get_FrameAddr_frame(node);
3636 ir_node *new_op = be_transform_node(op);
3637 dbg_info *dbgi = get_irn_dbg_info(node);
3640 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3641 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3642 set_ia32_use_frame(new_node);
3644 SET_IA32_ORIG_NODE(new_node, node);
3650 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3652 static ir_node *gen_be_Return(ir_node *node)
3654 ir_graph *irg = current_ir_graph;
3655 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3656 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3657 ir_entity *ent = get_irg_entity(irg);
3658 ir_type *tp = get_entity_type(ent);
3663 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3664 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3666 int pn_ret_val, pn_ret_mem, arity, i;
3668 assert(ret_val != NULL);
3669 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3670 return be_duplicate_node(node);
3673 res_type = get_method_res_type(tp, 0);
3675 if (! is_Primitive_type(res_type)) {
3676 return be_duplicate_node(node);
3679 mode = get_type_mode(res_type);
3680 if (! mode_is_float(mode)) {
3681 return be_duplicate_node(node);
3684 assert(get_method_n_ress(tp) == 1);
3686 pn_ret_val = get_Proj_proj(ret_val);
3687 pn_ret_mem = get_Proj_proj(ret_mem);
3689 /* get the Barrier */
3690 barrier = get_Proj_pred(ret_val);
3692 /* get result input of the Barrier */
3693 ret_val = get_irn_n(barrier, pn_ret_val);
3694 new_ret_val = be_transform_node(ret_val);
3696 /* get memory input of the Barrier */
3697 ret_mem = get_irn_n(barrier, pn_ret_mem);
3698 new_ret_mem = be_transform_node(ret_mem);
3700 frame = get_irg_frame(irg);
3702 dbgi = get_irn_dbg_info(barrier);
3703 block = be_transform_node(get_nodes_block(barrier));
3705 /* store xmm0 onto stack */
3706 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3707 new_ret_mem, new_ret_val);
3708 set_ia32_ls_mode(sse_store, mode);
3709 set_ia32_op_type(sse_store, ia32_AddrModeD);
3710 set_ia32_use_frame(sse_store);
3712 /* load into x87 register */
3713 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3714 set_ia32_op_type(fld, ia32_AddrModeS);
3715 set_ia32_use_frame(fld);
3717 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3718 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3720 /* create a new barrier */
3721 arity = get_irn_arity(barrier);
3722 in = ALLOCAN(ir_node*, arity);
3723 for (i = 0; i < arity; ++i) {
3726 if (i == pn_ret_val) {
3728 } else if (i == pn_ret_mem) {
3731 ir_node *in = get_irn_n(barrier, i);
3732 new_in = be_transform_node(in);
3737 new_barrier = new_ir_node(dbgi, irg, block,
3738 get_irn_op(barrier), get_irn_mode(barrier),
3740 copy_node_attr(barrier, new_barrier);
3741 be_duplicate_deps(barrier, new_barrier);
3742 be_set_transformed_node(barrier, new_barrier);
3744 /* transform normally */
3745 return be_duplicate_node(node);
3749 * Transform a be_AddSP into an ia32_SubSP.
3751 static ir_node *gen_be_AddSP(ir_node *node)
3753 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3754 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3756 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3757 match_am | match_immediate);
3761 * Transform a be_SubSP into an ia32_AddSP
3763 static ir_node *gen_be_SubSP(ir_node *node)
3765 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3766 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3768 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3769 match_am | match_immediate);
3773 * Change some phi modes
3775 static ir_node *gen_Phi(ir_node *node)
3777 const arch_register_req_t *req;
3778 ir_node *block = be_transform_node(get_nodes_block(node));
3779 ir_graph *irg = current_ir_graph;
3780 dbg_info *dbgi = get_irn_dbg_info(node);
3781 ir_mode *mode = get_irn_mode(node);
3784 if (ia32_mode_needs_gp_reg(mode)) {
3785 /* we shouldn't have any 64bit stuff around anymore */
3786 assert(get_mode_size_bits(mode) <= 32);
3787 /* all integer operations are on 32bit registers now */
3789 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3790 } else if (mode_is_float(mode)) {
3791 if (ia32_cg_config.use_sse2) {
3793 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3796 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3799 req = arch_no_register_req;
3802 /* phi nodes allow loops, so we use the old arguments for now
3803 * and fix this later */
3804 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3805 get_irn_in(node) + 1);
3806 copy_node_attr(node, phi);
3807 be_duplicate_deps(node, phi);
3809 arch_set_out_register_req(phi, 0, req);
3811 be_enqueue_preds(node);
3816 static ir_node *gen_Jmp(ir_node *node)
3818 ir_node *block = get_nodes_block(node);
3819 ir_node *new_block = be_transform_node(block);
3820 dbg_info *dbgi = get_irn_dbg_info(node);
3823 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3824 SET_IA32_ORIG_NODE(new_node, node);
3832 static ir_node *gen_IJmp(ir_node *node)
3834 ir_node *block = get_nodes_block(node);
3835 ir_node *new_block = be_transform_node(block);
3836 dbg_info *dbgi = get_irn_dbg_info(node);
3837 ir_node *op = get_IJmp_target(node);
3839 ia32_address_mode_t am;
3840 ia32_address_t *addr = &am.addr;
3842 assert(get_irn_mode(op) == mode_P);
3844 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3846 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3847 addr->mem, am.new_op2);
3848 set_am_attributes(new_node, &am);
3849 SET_IA32_ORIG_NODE(new_node, node);
3851 new_node = fix_mem_proj(new_node, &am);
3857 * Transform a Bound node.
3859 static ir_node *gen_Bound(ir_node *node)
3862 ir_node *lower = get_Bound_lower(node);
3863 dbg_info *dbgi = get_irn_dbg_info(node);
3865 if (is_Const_0(lower)) {
3866 /* typical case for Java */
3867 ir_node *sub, *res, *flags, *block;
3869 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3870 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3872 block = get_nodes_block(res);
3873 if (! is_Proj(res)) {
3875 set_irn_mode(sub, mode_T);
3876 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3878 sub = get_Proj_pred(res);
3880 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3881 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3882 SET_IA32_ORIG_NODE(new_node, node);
3884 panic("generic Bound not supported in ia32 Backend");
3890 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3892 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3893 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3895 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3896 match_immediate | match_mode_neutral);
3899 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3901 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3902 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3903 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3907 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3909 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3910 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3911 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3915 static ir_node *gen_ia32_l_Add(ir_node *node)
3917 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3918 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3919 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3920 match_commutative | match_am | match_immediate |
3921 match_mode_neutral);
3923 if (is_Proj(lowered)) {
3924 lowered = get_Proj_pred(lowered);
3926 assert(is_ia32_Add(lowered));
3927 set_irn_mode(lowered, mode_T);
3933 static ir_node *gen_ia32_l_Adc(ir_node *node)
3935 return gen_binop_flags(node, new_bd_ia32_Adc,
3936 match_commutative | match_am | match_immediate |
3937 match_mode_neutral);
3941 * Transforms a l_MulS into a "real" MulS node.
3943 * @return the created ia32 Mul node
3945 static ir_node *gen_ia32_l_Mul(ir_node *node)
3947 ir_node *left = get_binop_left(node);
3948 ir_node *right = get_binop_right(node);
3950 return gen_binop(node, left, right, new_bd_ia32_Mul,
3951 match_commutative | match_am | match_mode_neutral);
3955 * Transforms a l_IMulS into a "real" IMul1OPS node.
3957 * @return the created ia32 IMul1OP node
3959 static ir_node *gen_ia32_l_IMul(ir_node *node)
3961 ir_node *left = get_binop_left(node);
3962 ir_node *right = get_binop_right(node);
3964 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3965 match_commutative | match_am | match_mode_neutral);
3968 static ir_node *gen_ia32_l_Sub(ir_node *node)
3970 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3971 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3972 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3973 match_am | match_immediate | match_mode_neutral);
3975 if (is_Proj(lowered)) {
3976 lowered = get_Proj_pred(lowered);
3978 assert(is_ia32_Sub(lowered));
3979 set_irn_mode(lowered, mode_T);
3985 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3987 return gen_binop_flags(node, new_bd_ia32_Sbb,
3988 match_am | match_immediate | match_mode_neutral);
3992 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3993 * op1 - target to be shifted
3994 * op2 - contains bits to be shifted into target
3996 * Only op3 can be an immediate.
3998 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3999 ir_node *low, ir_node *count)
4001 ir_node *block = get_nodes_block(node);
4002 ir_node *new_block = be_transform_node(block);
4003 dbg_info *dbgi = get_irn_dbg_info(node);
4004 ir_node *new_high = be_transform_node(high);
4005 ir_node *new_low = be_transform_node(low);
4009 /* the shift amount can be any mode that is bigger than 5 bits, since all
4010 * other bits are ignored anyway */
4011 while (is_Conv(count) &&
4012 get_irn_n_edges(count) == 1 &&
4013 mode_is_int(get_irn_mode(count))) {
4014 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4015 count = get_Conv_op(count);
4017 new_count = create_immediate_or_transform(count, 0);
4019 if (is_ia32_l_ShlD(node)) {
4020 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4023 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4026 SET_IA32_ORIG_NODE(new_node, node);
4031 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4033 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4034 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4035 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4036 return gen_lowered_64bit_shifts(node, high, low, count);
4039 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4041 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4042 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4043 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4044 return gen_lowered_64bit_shifts(node, high, low, count);
4047 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4049 ir_node *src_block = get_nodes_block(node);
4050 ir_node *block = be_transform_node(src_block);
4051 ir_graph *irg = current_ir_graph;
4052 dbg_info *dbgi = get_irn_dbg_info(node);
4053 ir_node *frame = get_irg_frame(irg);
4054 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4055 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4056 ir_node *new_val_low = be_transform_node(val_low);
4057 ir_node *new_val_high = be_transform_node(val_high);
4059 ir_node *sync, *fild, *res;
4060 ir_node *store_low, *store_high;
4062 if (ia32_cg_config.use_sse2) {
4063 panic("ia32_l_LLtoFloat not implemented for SSE2");
4067 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4069 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4071 SET_IA32_ORIG_NODE(store_low, node);
4072 SET_IA32_ORIG_NODE(store_high, node);
4074 set_ia32_use_frame(store_low);
4075 set_ia32_use_frame(store_high);
4076 set_ia32_op_type(store_low, ia32_AddrModeD);
4077 set_ia32_op_type(store_high, ia32_AddrModeD);
4078 set_ia32_ls_mode(store_low, mode_Iu);
4079 set_ia32_ls_mode(store_high, mode_Is);
4080 add_ia32_am_offs_int(store_high, 4);
4084 sync = new_rd_Sync(dbgi, block, 2, in);
4087 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4089 set_ia32_use_frame(fild);
4090 set_ia32_op_type(fild, ia32_AddrModeS);
4091 set_ia32_ls_mode(fild, mode_Ls);
4093 SET_IA32_ORIG_NODE(fild, node);
4095 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4097 if (! mode_is_signed(get_irn_mode(val_high))) {
4098 ia32_address_mode_t am;
4100 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4103 am.addr.base = noreg_GP;
4104 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4105 am.addr.mem = nomem;
4108 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4109 am.addr.use_frame = 0;
4110 am.addr.frame_entity = NULL;
4111 am.addr.symconst_sign = 0;
4112 am.ls_mode = mode_F;
4113 am.mem_proj = nomem;
4114 am.op_type = ia32_AddrModeS;
4116 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4117 am.pinned = op_pin_state_floats;
4119 am.ins_permuted = 0;
4121 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4122 am.new_op1, am.new_op2, get_fpcw());
4123 set_am_attributes(fadd, &am);
4125 set_irn_mode(fadd, mode_T);
4126 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4131 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4133 ir_node *src_block = get_nodes_block(node);
4134 ir_node *block = be_transform_node(src_block);
4135 ir_graph *irg = get_Block_irg(block);
4136 dbg_info *dbgi = get_irn_dbg_info(node);
4137 ir_node *frame = get_irg_frame(irg);
4138 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4139 ir_node *new_val = be_transform_node(val);
4140 ir_node *fist, *mem;
4142 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4143 SET_IA32_ORIG_NODE(fist, node);
4144 set_ia32_use_frame(fist);
4145 set_ia32_op_type(fist, ia32_AddrModeD);
4146 set_ia32_ls_mode(fist, mode_Ls);
4152 * the BAD transformer.
4154 static ir_node *bad_transform(ir_node *node)
4156 panic("No transform function for %+F available.", node);
4160 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4162 ir_node *block = be_transform_node(get_nodes_block(node));
4163 ir_graph *irg = get_Block_irg(block);
4164 ir_node *pred = get_Proj_pred(node);
4165 ir_node *new_pred = be_transform_node(pred);
4166 ir_node *frame = get_irg_frame(irg);
4167 dbg_info *dbgi = get_irn_dbg_info(node);
4168 long pn = get_Proj_proj(node);
4173 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4174 SET_IA32_ORIG_NODE(load, node);
4175 set_ia32_use_frame(load);
4176 set_ia32_op_type(load, ia32_AddrModeS);
4177 set_ia32_ls_mode(load, mode_Iu);
4178 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4179 * 32 bit from it with this particular load */
4180 attr = get_ia32_attr(load);
4181 attr->data.need_64bit_stackent = 1;
4183 if (pn == pn_ia32_l_FloattoLL_res_high) {
4184 add_ia32_am_offs_int(load, 4);
4186 assert(pn == pn_ia32_l_FloattoLL_res_low);
4189 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4195 * Transform the Projs of an AddSP.
4197 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4199 ir_node *block = be_transform_node(get_nodes_block(node));
4200 ir_node *pred = get_Proj_pred(node);
4201 ir_node *new_pred = be_transform_node(pred);
4202 dbg_info *dbgi = get_irn_dbg_info(node);
4203 long proj = get_Proj_proj(node);
4205 if (proj == pn_be_AddSP_sp) {
4206 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4207 pn_ia32_SubSP_stack);
4208 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4210 } else if (proj == pn_be_AddSP_res) {
4211 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4212 pn_ia32_SubSP_addr);
4213 } else if (proj == pn_be_AddSP_M) {
4214 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4217 panic("No idea how to transform proj->AddSP");
4221 * Transform the Projs of a SubSP.
4223 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4225 ir_node *block = be_transform_node(get_nodes_block(node));
4226 ir_node *pred = get_Proj_pred(node);
4227 ir_node *new_pred = be_transform_node(pred);
4228 dbg_info *dbgi = get_irn_dbg_info(node);
4229 long proj = get_Proj_proj(node);
4231 if (proj == pn_be_SubSP_sp) {
4232 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4233 pn_ia32_AddSP_stack);
4234 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4236 } else if (proj == pn_be_SubSP_M) {
4237 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4240 panic("No idea how to transform proj->SubSP");
4244 * Transform and renumber the Projs from a Load.
4246 static ir_node *gen_Proj_Load(ir_node *node)
4249 ir_node *block = be_transform_node(get_nodes_block(node));
4250 ir_node *pred = get_Proj_pred(node);
4251 dbg_info *dbgi = get_irn_dbg_info(node);
4252 long proj = get_Proj_proj(node);
4254 /* loads might be part of source address mode matches, so we don't
4255 * transform the ProjMs yet (with the exception of loads whose result is
4258 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4260 ir_node *old_block = get_nodes_block(node);
4262 /* this is needed, because sometimes we have loops that are only
4263 reachable through the ProjM */
4264 be_enqueue_preds(node);
4265 /* do it in 2 steps, to silence firm verifier */
4266 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4267 set_Proj_proj(res, pn_ia32_mem);
4271 /* renumber the proj */
4272 new_pred = be_transform_node(pred);
4273 if (is_ia32_Load(new_pred)) {
4276 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4278 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4279 case pn_Load_X_regular:
4280 return new_rd_Jmp(dbgi, block);
4281 case pn_Load_X_except:
4282 /* This Load might raise an exception. Mark it. */
4283 set_ia32_exc_label(new_pred, 1);
4284 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4288 } else if (is_ia32_Conv_I2I(new_pred) ||
4289 is_ia32_Conv_I2I8Bit(new_pred)) {
4290 set_irn_mode(new_pred, mode_T);
4291 if (proj == pn_Load_res) {
4292 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4293 } else if (proj == pn_Load_M) {
4294 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4296 } else if (is_ia32_xLoad(new_pred)) {
4299 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4301 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4302 case pn_Load_X_regular:
4303 return new_rd_Jmp(dbgi, block);
4304 case pn_Load_X_except:
4305 /* This Load might raise an exception. Mark it. */
4306 set_ia32_exc_label(new_pred, 1);
4307 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4311 } else if (is_ia32_vfld(new_pred)) {
4314 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4316 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4317 case pn_Load_X_regular:
4318 return new_rd_Jmp(dbgi, block);
4319 case pn_Load_X_except:
4320 /* This Load might raise an exception. Mark it. */
4321 set_ia32_exc_label(new_pred, 1);
4322 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4327 /* can happen for ProJMs when source address mode happened for the
4330 /* however it should not be the result proj, as that would mean the
4331 load had multiple users and should not have been used for
4333 if (proj != pn_Load_M) {
4334 panic("internal error: transformed node not a Load");
4336 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4339 panic("No idea how to transform proj");
4343 * Transform and renumber the Projs from a DivMod like instruction.
4345 static ir_node *gen_Proj_DivMod(ir_node *node)
4347 ir_node *block = be_transform_node(get_nodes_block(node));
4348 ir_node *pred = get_Proj_pred(node);
4349 ir_node *new_pred = be_transform_node(pred);
4350 dbg_info *dbgi = get_irn_dbg_info(node);
4351 long proj = get_Proj_proj(node);
4353 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4355 switch (get_irn_opcode(pred)) {
4359 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4361 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4362 case pn_Div_X_regular:
4363 return new_rd_Jmp(dbgi, block);
4364 case pn_Div_X_except:
4365 set_ia32_exc_label(new_pred, 1);
4366 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4374 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4376 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4377 case pn_Mod_X_except:
4378 set_ia32_exc_label(new_pred, 1);
4379 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4387 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4388 case pn_DivMod_res_div:
4389 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4390 case pn_DivMod_res_mod:
4391 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4392 case pn_DivMod_X_regular:
4393 return new_rd_Jmp(dbgi, block);
4394 case pn_DivMod_X_except:
4395 set_ia32_exc_label(new_pred, 1);
4396 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4405 panic("No idea how to transform proj->DivMod");
4409 * Transform and renumber the Projs from a CopyB.
4411 static ir_node *gen_Proj_CopyB(ir_node *node)
4413 ir_node *block = be_transform_node(get_nodes_block(node));
4414 ir_node *pred = get_Proj_pred(node);
4415 ir_node *new_pred = be_transform_node(pred);
4416 dbg_info *dbgi = get_irn_dbg_info(node);
4417 long proj = get_Proj_proj(node);
4420 case pn_CopyB_M_regular:
4421 if (is_ia32_CopyB_i(new_pred)) {
4422 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4423 } else if (is_ia32_CopyB(new_pred)) {
4424 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4431 panic("No idea how to transform proj->CopyB");
4435 * Transform and renumber the Projs from a Quot.
4437 static ir_node *gen_Proj_Quot(ir_node *node)
4439 ir_node *block = be_transform_node(get_nodes_block(node));
4440 ir_node *pred = get_Proj_pred(node);
4441 ir_node *new_pred = be_transform_node(pred);
4442 dbg_info *dbgi = get_irn_dbg_info(node);
4443 long proj = get_Proj_proj(node);
4447 if (is_ia32_xDiv(new_pred)) {
4448 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4449 } else if (is_ia32_vfdiv(new_pred)) {
4450 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4454 if (is_ia32_xDiv(new_pred)) {
4455 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4456 } else if (is_ia32_vfdiv(new_pred)) {
4457 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4460 case pn_Quot_X_regular:
4461 case pn_Quot_X_except:
4466 panic("No idea how to transform proj->Quot");
4469 static ir_node *gen_be_Call(ir_node *node)
4471 dbg_info *const dbgi = get_irn_dbg_info(node);
4472 ir_node *const src_block = get_nodes_block(node);
4473 ir_node *const block = be_transform_node(src_block);
4474 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4475 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4476 ir_node *const sp = be_transform_node(src_sp);
4477 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4478 ia32_address_mode_t am;
4479 ia32_address_t *const addr = &am.addr;
4484 ir_node * eax = noreg_GP;
4485 ir_node * ecx = noreg_GP;
4486 ir_node * edx = noreg_GP;
4487 unsigned const pop = be_Call_get_pop(node);
4488 ir_type *const call_tp = be_Call_get_type(node);
4489 int old_no_pic_adjust;
4491 /* Run the x87 simulator if the call returns a float value */
4492 if (get_method_n_ress(call_tp) > 0) {
4493 ir_type *const res_type = get_method_res_type(call_tp, 0);
4494 ir_mode *const res_mode = get_type_mode(res_type);
4496 if (res_mode != NULL && mode_is_float(res_mode)) {
4497 env_cg->do_x87_sim = 1;
4501 /* We do not want be_Call direct calls */
4502 assert(be_Call_get_entity(node) == NULL);
4504 /* special case for PIC trampoline calls */
4505 old_no_pic_adjust = no_pic_adjust;
4506 no_pic_adjust = env_cg->birg->main_env->options->pic;
4508 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4509 match_am | match_immediate);
4511 no_pic_adjust = old_no_pic_adjust;
4513 i = get_irn_arity(node) - 1;
4514 fpcw = be_transform_node(get_irn_n(node, i--));
4515 for (; i >= be_pos_Call_first_arg; --i) {
4516 arch_register_req_t const *const req = arch_get_register_req(node, i);
4517 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4519 assert(req->type == arch_register_req_type_limited);
4520 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4522 switch (*req->limited) {
4523 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4524 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4525 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4526 default: panic("Invalid GP register for register parameter");
4530 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4531 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4532 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4533 set_am_attributes(call, &am);
4534 call = fix_mem_proj(call, &am);
4536 if (get_irn_pinned(node) == op_pin_state_pinned)
4537 set_irn_pinned(call, op_pin_state_pinned);
4539 SET_IA32_ORIG_NODE(call, node);
4541 if (ia32_cg_config.use_sse2) {
4542 /* remember this call for post-processing */
4543 ARR_APP1(ir_node *, call_list, call);
4544 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4551 * Transform Builtin trap
4553 static ir_node *gen_trap(ir_node *node) {
4554 dbg_info *dbgi = get_irn_dbg_info(node);
4555 ir_node *block = be_transform_node(get_nodes_block(node));
4556 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4558 return new_bd_ia32_UD2(dbgi, block, mem);
4562 * Transform Builtin debugbreak
4564 static ir_node *gen_debugbreak(ir_node *node) {
4565 dbg_info *dbgi = get_irn_dbg_info(node);
4566 ir_node *block = be_transform_node(get_nodes_block(node));
4567 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4569 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4573 * Transform Builtin return_address
4575 static ir_node *gen_return_address(ir_node *node) {
4576 ir_node *param = get_Builtin_param(node, 0);
4577 ir_node *frame = get_Builtin_param(node, 1);
4578 dbg_info *dbgi = get_irn_dbg_info(node);
4579 tarval *tv = get_Const_tarval(param);
4580 unsigned long value = get_tarval_long(tv);
4582 ir_node *block = be_transform_node(get_nodes_block(node));
4583 ir_node *ptr = be_transform_node(frame);
4587 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4588 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4589 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4592 /* load the return address from this frame */
4593 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4595 set_irn_pinned(load, get_irn_pinned(node));
4596 set_ia32_op_type(load, ia32_AddrModeS);
4597 set_ia32_ls_mode(load, mode_Iu);
4599 set_ia32_am_offs_int(load, 0);
4600 set_ia32_use_frame(load);
4601 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4603 if (get_irn_pinned(node) == op_pin_state_floats) {
4604 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4605 && pn_ia32_vfld_res == pn_ia32_Load_res
4606 && pn_ia32_Load_res == pn_ia32_res);
4607 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4610 SET_IA32_ORIG_NODE(load, node);
4611 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4615 * Transform Builtin frame_address
4617 static ir_node *gen_frame_address(ir_node *node) {
4618 ir_node *param = get_Builtin_param(node, 0);
4619 ir_node *frame = get_Builtin_param(node, 1);
4620 dbg_info *dbgi = get_irn_dbg_info(node);
4621 tarval *tv = get_Const_tarval(param);
4622 unsigned long value = get_tarval_long(tv);
4624 ir_node *block = be_transform_node(get_nodes_block(node));
4625 ir_node *ptr = be_transform_node(frame);
4630 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4631 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4632 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4635 /* load the frame address from this frame */
4636 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4638 set_irn_pinned(load, get_irn_pinned(node));
4639 set_ia32_op_type(load, ia32_AddrModeS);
4640 set_ia32_ls_mode(load, mode_Iu);
4642 ent = ia32_get_frame_address_entity();
4644 set_ia32_am_offs_int(load, 0);
4645 set_ia32_use_frame(load);
4646 set_ia32_frame_ent(load, ent);
4648 /* will fail anyway, but gcc does this: */
4649 set_ia32_am_offs_int(load, 0);
4652 if (get_irn_pinned(node) == op_pin_state_floats) {
4653 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4654 && pn_ia32_vfld_res == pn_ia32_Load_res
4655 && pn_ia32_Load_res == pn_ia32_res);
4656 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4659 SET_IA32_ORIG_NODE(load, node);
4660 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4664 * Transform Builtin frame_address
4666 static ir_node *gen_prefetch(ir_node *node) {
4668 ir_node *ptr, *block, *mem, *base, *index;
4669 ir_node *param, *new_node;
4672 ia32_address_t addr;
4674 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4675 /* no prefetch at all, route memory */
4676 return be_transform_node(get_Builtin_mem(node));
4679 param = get_Builtin_param(node, 1);
4680 tv = get_Const_tarval(param);
4681 rw = get_tarval_long(tv);
4683 /* construct load address */
4684 memset(&addr, 0, sizeof(addr));
4685 ptr = get_Builtin_param(node, 0);
4686 ia32_create_address_mode(&addr, ptr, 0);
4693 base = be_transform_node(base);
4696 if (index == NULL) {
4699 index = be_transform_node(index);
4702 dbgi = get_irn_dbg_info(node);
4703 block = be_transform_node(get_nodes_block(node));
4704 mem = be_transform_node(get_Builtin_mem(node));
4706 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4707 /* we have 3DNow!, this was already checked above */
4708 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4709 } else if (ia32_cg_config.use_sse_prefetch) {
4710 /* note: rw == 1 is IGNORED in that case */
4711 param = get_Builtin_param(node, 2);
4712 tv = get_Const_tarval(param);
4713 locality = get_tarval_long(tv);
4715 /* SSE style prefetch */
4718 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4721 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4724 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4727 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4731 assert(ia32_cg_config.use_3dnow_prefetch);
4732 /* 3DNow! style prefetch */
4733 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4736 set_irn_pinned(new_node, get_irn_pinned(node));
4737 set_ia32_op_type(new_node, ia32_AddrModeS);
4738 set_ia32_ls_mode(new_node, mode_Bu);
4739 set_address(new_node, &addr);
4741 SET_IA32_ORIG_NODE(new_node, node);
4743 be_dep_on_frame(new_node);
4744 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4748 * Transform bsf like node
4750 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4752 ir_node *param = get_Builtin_param(node, 0);
4753 dbg_info *dbgi = get_irn_dbg_info(node);
4755 ir_node *block = get_nodes_block(node);
4756 ir_node *new_block = be_transform_node(block);
4758 ia32_address_mode_t am;
4759 ia32_address_t *addr = &am.addr;
4762 match_arguments(&am, block, NULL, param, NULL, match_am);
4764 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4765 set_am_attributes(cnt, &am);
4766 set_ia32_ls_mode(cnt, get_irn_mode(param));
4768 SET_IA32_ORIG_NODE(cnt, node);
4769 return fix_mem_proj(cnt, &am);
4773 * Transform builtin ffs.
4775 static ir_node *gen_ffs(ir_node *node)
4777 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4778 ir_node *real = skip_Proj(bsf);
4779 dbg_info *dbgi = get_irn_dbg_info(real);
4780 ir_node *block = get_nodes_block(real);
4781 ir_node *flag, *set, *conv, *neg, *or;
4784 if (get_irn_mode(real) != mode_T) {
4785 set_irn_mode(real, mode_T);
4786 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4789 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4792 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4793 SET_IA32_ORIG_NODE(set, node);
4796 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4797 SET_IA32_ORIG_NODE(conv, node);
4800 neg = new_bd_ia32_Neg(dbgi, block, conv);
4803 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4804 set_ia32_commutative(or);
4807 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4811 * Transform builtin clz.
4813 static ir_node *gen_clz(ir_node *node)
4815 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4816 ir_node *real = skip_Proj(bsr);
4817 dbg_info *dbgi = get_irn_dbg_info(real);
4818 ir_node *block = get_nodes_block(real);
4819 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4821 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4825 * Transform builtin ctz.
4827 static ir_node *gen_ctz(ir_node *node)
4829 return gen_unop_AM(node, new_bd_ia32_Bsf);
4833 * Transform builtin parity.
4835 static ir_node *gen_parity(ir_node *node)
4837 ir_node *param = get_Builtin_param(node, 0);
4838 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_node *block = get_nodes_block(node);
4842 ir_node *new_block = be_transform_node(block);
4843 ir_node *imm, *cmp, *new_node;
4845 ia32_address_mode_t am;
4846 ia32_address_t *addr = &am.addr;
4850 match_arguments(&am, block, NULL, param, NULL, match_am);
4851 imm = ia32_create_Immediate(NULL, 0, 0);
4852 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4853 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4854 set_am_attributes(cmp, &am);
4855 set_ia32_ls_mode(cmp, mode_Iu);
4857 SET_IA32_ORIG_NODE(cmp, node);
4859 cmp = fix_mem_proj(cmp, &am);
4862 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4863 SET_IA32_ORIG_NODE(new_node, node);
4866 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4867 nomem, new_node, mode_Bu);
4868 SET_IA32_ORIG_NODE(new_node, node);
4873 * Transform builtin popcount
4875 static ir_node *gen_popcount(ir_node *node) {
4876 ir_node *param = get_Builtin_param(node, 0);
4877 dbg_info *dbgi = get_irn_dbg_info(node);
4879 ir_node *block = get_nodes_block(node);
4880 ir_node *new_block = be_transform_node(block);
4883 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4885 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4886 if (ia32_cg_config.use_popcnt) {
4887 ia32_address_mode_t am;
4888 ia32_address_t *addr = &am.addr;
4891 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4893 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4894 set_am_attributes(cnt, &am);
4895 set_ia32_ls_mode(cnt, get_irn_mode(param));
4897 SET_IA32_ORIG_NODE(cnt, node);
4898 return fix_mem_proj(cnt, &am);
4901 new_param = be_transform_node(param);
4903 /* do the standard popcount algo */
4905 /* m1 = x & 0x55555555 */
4906 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4907 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4910 simm = ia32_create_Immediate(NULL, 0, 1);
4911 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4913 /* m2 = s1 & 0x55555555 */
4914 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4917 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4919 /* m4 = m3 & 0x33333333 */
4920 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4921 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4924 simm = ia32_create_Immediate(NULL, 0, 2);
4925 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4927 /* m5 = s2 & 0x33333333 */
4928 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4931 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4933 /* m7 = m6 & 0x0F0F0F0F */
4934 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4935 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4938 simm = ia32_create_Immediate(NULL, 0, 4);
4939 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4941 /* m8 = s3 & 0x0F0F0F0F */
4942 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4945 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4947 /* m10 = m9 & 0x00FF00FF */
4948 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4949 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4952 simm = ia32_create_Immediate(NULL, 0, 8);
4953 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4955 /* m11 = s4 & 0x00FF00FF */
4956 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4958 /* m12 = m10 + m11 */
4959 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4961 /* m13 = m12 & 0x0000FFFF */
4962 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4963 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4965 /* s5 = m12 >> 16 */
4966 simm = ia32_create_Immediate(NULL, 0, 16);
4967 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4969 /* res = m13 + s5 */
4970 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4974 * Transform builtin byte swap.
4976 static ir_node *gen_bswap(ir_node *node) {
4977 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4978 dbg_info *dbgi = get_irn_dbg_info(node);
4980 ir_node *block = get_nodes_block(node);
4981 ir_node *new_block = be_transform_node(block);
4982 ir_mode *mode = get_irn_mode(param);
4983 unsigned size = get_mode_size_bits(mode);
4984 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4988 if (ia32_cg_config.use_i486) {
4989 /* swap available */
4990 return new_bd_ia32_Bswap(dbgi, new_block, param);
4992 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4993 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4995 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4996 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4998 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5000 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5001 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5003 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5004 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5007 /* swap16 always available */
5008 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5011 panic("Invalid bswap size (%d)", size);
5016 * Transform builtin outport.
5018 static ir_node *gen_outport(ir_node *node) {
5019 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5020 ir_node *oldv = get_Builtin_param(node, 1);
5021 ir_mode *mode = get_irn_mode(oldv);
5022 ir_node *value = be_transform_node(oldv);
5023 ir_node *block = be_transform_node(get_nodes_block(node));
5024 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5025 dbg_info *dbgi = get_irn_dbg_info(node);
5027 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5028 set_ia32_ls_mode(res, mode);
5033 * Transform builtin inport.
5035 static ir_node *gen_inport(ir_node *node) {
5036 ir_type *tp = get_Builtin_type(node);
5037 ir_type *rstp = get_method_res_type(tp, 0);
5038 ir_mode *mode = get_type_mode(rstp);
5039 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5040 ir_node *block = be_transform_node(get_nodes_block(node));
5041 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5042 dbg_info *dbgi = get_irn_dbg_info(node);
5044 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5045 set_ia32_ls_mode(res, mode);
5047 /* check for missing Result Proj */
5052 * Transform a builtin inner trampoline
5054 static ir_node *gen_inner_trampoline(ir_node *node) {
5055 ir_node *ptr = get_Builtin_param(node, 0);
5056 ir_node *callee = get_Builtin_param(node, 1);
5057 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5058 ir_node *mem = get_Builtin_mem(node);
5059 ir_node *block = get_nodes_block(node);
5060 ir_node *new_block = be_transform_node(block);
5064 ir_node *trampoline;
5066 dbg_info *dbgi = get_irn_dbg_info(node);
5067 ia32_address_t addr;
5069 /* construct store address */
5070 memset(&addr, 0, sizeof(addr));
5071 ia32_create_address_mode(&addr, ptr, 0);
5073 if (addr.base == NULL) {
5074 addr.base = noreg_GP;
5076 addr.base = be_transform_node(addr.base);
5079 if (addr.index == NULL) {
5080 addr.index = noreg_GP;
5082 addr.index = be_transform_node(addr.index);
5084 addr.mem = be_transform_node(mem);
5086 /* mov ecx, <env> */
5087 val = ia32_create_Immediate(NULL, 0, 0xB9);
5088 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5089 addr.index, addr.mem, val);
5090 set_irn_pinned(store, get_irn_pinned(node));
5091 set_ia32_op_type(store, ia32_AddrModeD);
5092 set_ia32_ls_mode(store, mode_Bu);
5093 set_address(store, &addr);
5097 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5098 addr.index, addr.mem, env);
5099 set_irn_pinned(store, get_irn_pinned(node));
5100 set_ia32_op_type(store, ia32_AddrModeD);
5101 set_ia32_ls_mode(store, mode_Iu);
5102 set_address(store, &addr);
5106 /* jmp rel <callee> */
5107 val = ia32_create_Immediate(NULL, 0, 0xE9);
5108 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5109 addr.index, addr.mem, val);
5110 set_irn_pinned(store, get_irn_pinned(node));
5111 set_ia32_op_type(store, ia32_AddrModeD);
5112 set_ia32_ls_mode(store, mode_Bu);
5113 set_address(store, &addr);
5117 trampoline = be_transform_node(ptr);
5119 /* the callee is typically an immediate */
5120 if (is_SymConst(callee)) {
5121 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5123 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5125 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5127 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5128 addr.index, addr.mem, rel);
5129 set_irn_pinned(store, get_irn_pinned(node));
5130 set_ia32_op_type(store, ia32_AddrModeD);
5131 set_ia32_ls_mode(store, mode_Iu);
5132 set_address(store, &addr);
5137 return new_r_Tuple(new_block, 2, in);
5141 * Transform Builtin node.
5143 static ir_node *gen_Builtin(ir_node *node) {
5144 ir_builtin_kind kind = get_Builtin_kind(node);
5148 return gen_trap(node);
5149 case ir_bk_debugbreak:
5150 return gen_debugbreak(node);
5151 case ir_bk_return_address:
5152 return gen_return_address(node);
5153 case ir_bk_frame_address:
5154 return gen_frame_address(node);
5155 case ir_bk_prefetch:
5156 return gen_prefetch(node);
5158 return gen_ffs(node);
5160 return gen_clz(node);
5162 return gen_ctz(node);
5164 return gen_parity(node);
5165 case ir_bk_popcount:
5166 return gen_popcount(node);
5168 return gen_bswap(node);
5170 return gen_outport(node);
5172 return gen_inport(node);
5173 case ir_bk_inner_trampoline:
5174 return gen_inner_trampoline(node);
5176 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5180 * Transform Proj(Builtin) node.
5182 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5183 ir_node *node = get_Proj_pred(proj);
5184 ir_node *new_node = be_transform_node(node);
5185 ir_builtin_kind kind = get_Builtin_kind(node);
5188 case ir_bk_return_address:
5189 case ir_bk_frame_address:
5194 case ir_bk_popcount:
5196 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5199 case ir_bk_debugbreak:
5200 case ir_bk_prefetch:
5202 assert(get_Proj_proj(proj) == pn_Builtin_M);
5205 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5206 return new_r_Proj(get_nodes_block(new_node),
5207 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5209 assert(get_Proj_proj(proj) == pn_Builtin_M);
5210 return new_r_Proj(get_nodes_block(new_node),
5211 new_node, mode_M, pn_ia32_Inport_M);
5213 case ir_bk_inner_trampoline:
5214 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5215 return get_Tuple_pred(new_node, 1);
5217 assert(get_Proj_proj(proj) == pn_Builtin_M);
5218 return get_Tuple_pred(new_node, 0);
5221 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5224 static ir_node *gen_be_IncSP(ir_node *node)
5226 ir_node *res = be_duplicate_node(node);
5227 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5233 * Transform the Projs from a be_Call.
5235 static ir_node *gen_Proj_be_Call(ir_node *node)
5237 ir_node *block = be_transform_node(get_nodes_block(node));
5238 ir_node *call = get_Proj_pred(node);
5239 ir_node *new_call = be_transform_node(call);
5240 dbg_info *dbgi = get_irn_dbg_info(node);
5241 long proj = get_Proj_proj(node);
5242 ir_mode *mode = get_irn_mode(node);
5245 if (proj == pn_be_Call_M_regular) {
5246 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5248 /* transform call modes */
5249 if (mode_is_data(mode)) {
5250 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5254 /* Map from be_Call to ia32_Call proj number */
5255 if (proj == pn_be_Call_sp) {
5256 proj = pn_ia32_Call_stack;
5257 } else if (proj == pn_be_Call_M_regular) {
5258 proj = pn_ia32_Call_M;
5260 arch_register_req_t const *const req = arch_get_register_req_out(node);
5261 int const n_outs = arch_irn_get_n_outs(new_call);
5264 assert(proj >= pn_be_Call_first_res);
5265 assert(req->type & arch_register_req_type_limited);
5267 for (i = 0; i < n_outs; ++i) {
5268 arch_register_req_t const *const new_req
5269 = arch_get_out_register_req(new_call, i);
5271 if (!(new_req->type & arch_register_req_type_limited) ||
5272 new_req->cls != req->cls ||
5273 *new_req->limited != *req->limited)
5282 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5284 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5286 case pn_ia32_Call_stack:
5287 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5290 case pn_ia32_Call_fpcw:
5291 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5299 * Transform the Projs from a Cmp.
5301 static ir_node *gen_Proj_Cmp(ir_node *node)
5303 /* this probably means not all mode_b nodes were lowered... */
5304 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5309 * Transform the Projs from a Bound.
5311 static ir_node *gen_Proj_Bound(ir_node *node)
5313 ir_node *new_node, *block;
5314 ir_node *pred = get_Proj_pred(node);
5316 switch (get_Proj_proj(node)) {
5318 return be_transform_node(get_Bound_mem(pred));
5319 case pn_Bound_X_regular:
5320 new_node = be_transform_node(pred);
5321 block = get_nodes_block(new_node);
5322 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5323 case pn_Bound_X_except:
5324 new_node = be_transform_node(pred);
5325 block = get_nodes_block(new_node);
5326 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5328 return be_transform_node(get_Bound_index(pred));
5330 panic("unsupported Proj from Bound");
5334 static ir_node *gen_Proj_ASM(ir_node *node)
5336 ir_mode *mode = get_irn_mode(node);
5337 ir_node *pred = get_Proj_pred(node);
5338 ir_node *new_pred = be_transform_node(pred);
5339 ir_node *block = get_nodes_block(new_pred);
5340 long pos = get_Proj_proj(node);
5342 if (mode == mode_M) {
5343 pos = arch_irn_get_n_outs(new_pred)-1;
5344 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5346 } else if (mode_is_float(mode)) {
5349 panic("unexpected proj mode at ASM");
5352 return new_r_Proj(block, new_pred, mode, pos);
5356 * Transform and potentially renumber Proj nodes.
5358 static ir_node *gen_Proj(ir_node *node)
5360 ir_node *pred = get_Proj_pred(node);
5363 switch (get_irn_opcode(pred)) {
5365 proj = get_Proj_proj(node);
5366 if (proj == pn_Store_M) {
5367 return be_transform_node(pred);
5369 panic("No idea how to transform proj->Store");
5372 return gen_Proj_Load(node);
5374 return gen_Proj_ASM(node);
5376 return gen_Proj_Builtin(node);
5380 return gen_Proj_DivMod(node);
5382 return gen_Proj_CopyB(node);
5384 return gen_Proj_Quot(node);
5386 return gen_Proj_be_SubSP(node);
5388 return gen_Proj_be_AddSP(node);
5390 return gen_Proj_be_Call(node);
5392 return gen_Proj_Cmp(node);
5394 return gen_Proj_Bound(node);
5396 proj = get_Proj_proj(node);
5398 case pn_Start_X_initial_exec: {
5399 ir_node *block = get_nodes_block(pred);
5400 ir_node *new_block = be_transform_node(block);
5401 dbg_info *dbgi = get_irn_dbg_info(node);
5402 /* we exchange the ProjX with a jump */
5403 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5408 case pn_Start_P_tls:
5409 return gen_Proj_tls(node);
5414 if (is_ia32_l_FloattoLL(pred)) {
5415 return gen_Proj_l_FloattoLL(node);
5417 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5421 ir_mode *mode = get_irn_mode(node);
5422 if (ia32_mode_needs_gp_reg(mode)) {
5423 ir_node *new_pred = be_transform_node(pred);
5424 ir_node *block = be_transform_node(get_nodes_block(node));
5425 ir_node *new_proj = new_r_Proj(block, new_pred,
5426 mode_Iu, get_Proj_proj(node));
5427 new_proj->node_nr = node->node_nr;
5432 return be_duplicate_node(node);
5436 * Enters all transform functions into the generic pointer
5438 static void register_transformers(void)
5440 /* first clear the generic function pointer for all ops */
5441 clear_irp_opcodes_generic_func();
5443 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5444 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5484 /* transform ops from intrinsic lowering */
5496 GEN(ia32_l_LLtoFloat);
5497 GEN(ia32_l_FloattoLL);
5503 /* we should never see these nodes */
5518 /* handle builtins */
5521 /* handle generic backend nodes */
5535 * Pre-transform all unknown and noreg nodes.
5537 static void ia32_pretransform_node(void)
5539 ia32_code_gen_t *cg = env_cg;
5541 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5542 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5543 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5544 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5545 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5546 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5548 nomem = get_irg_no_mem(current_ir_graph);
5549 noreg_GP = ia32_new_NoReg_gp(cg);
5555 * Walker, checks if all ia32 nodes producing more than one result have their
5556 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5558 static void add_missing_keep_walker(ir_node *node, void *data)
5561 unsigned found_projs = 0;
5562 const ir_edge_t *edge;
5563 ir_mode *mode = get_irn_mode(node);
5568 if (!is_ia32_irn(node))
5571 n_outs = arch_irn_get_n_outs(node);
5574 if (is_ia32_SwitchJmp(node))
5577 assert(n_outs < (int) sizeof(unsigned) * 8);
5578 foreach_out_edge(node, edge) {
5579 ir_node *proj = get_edge_src_irn(edge);
5582 /* The node could be kept */
5586 if (get_irn_mode(proj) == mode_M)
5589 pn = get_Proj_proj(proj);
5590 assert(pn < n_outs);
5591 found_projs |= 1 << pn;
5595 /* are keeps missing? */
5597 for (i = 0; i < n_outs; ++i) {
5600 const arch_register_req_t *req;
5601 const arch_register_class_t *cls;
5603 if (found_projs & (1 << i)) {
5607 req = arch_get_out_register_req(node, i);
5612 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5616 block = get_nodes_block(node);
5617 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5618 if (last_keep != NULL) {
5619 be_Keep_add_node(last_keep, cls, in[0]);
5621 last_keep = be_new_Keep(block, 1, in);
5622 if (sched_is_scheduled(node)) {
5623 sched_add_after(node, last_keep);
5630 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5633 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5635 ir_graph *irg = be_get_birg_irg(cg->birg);
5636 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5640 * Post-process all calls if we are in SSE mode.
5641 * The ABI requires that the results are in st0, copy them
5642 * to a xmm register.
5644 static void postprocess_fp_call_results(void) {
5647 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5648 ir_node *call = call_list[i];
5649 ir_type *mtp = call_types[i];
5652 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5653 ir_type *res_tp = get_method_res_type(mtp, j);
5654 ir_node *res, *new_res;
5655 const ir_edge_t *edge, *next;
5658 if (! is_atomic_type(res_tp)) {
5659 /* no floating point return */
5662 mode = get_type_mode(res_tp);
5663 if (! mode_is_float(mode)) {
5664 /* no floating point return */
5668 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5671 /* now patch the users */
5672 foreach_out_edge_safe(res, edge, next) {
5673 ir_node *succ = get_edge_src_irn(edge);
5676 if (be_is_Keep(succ))
5679 if (is_ia32_xStore(succ)) {
5680 /* an xStore can be patched into an vfst */
5681 dbg_info *db = get_irn_dbg_info(succ);
5682 ir_node *block = get_nodes_block(succ);
5683 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5684 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5685 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5686 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5687 ir_mode *mode = get_ia32_ls_mode(succ);
5689 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5690 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5691 if (is_ia32_use_frame(succ))
5692 set_ia32_use_frame(st);
5693 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5694 set_irn_pinned(st, get_irn_pinned(succ));
5695 set_ia32_op_type(st, ia32_AddrModeD);
5699 if (new_res == NULL) {
5700 dbg_info *db = get_irn_dbg_info(call);
5701 ir_node *block = get_nodes_block(call);
5702 ir_node *frame = get_irg_frame(current_ir_graph);
5703 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5704 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5705 ir_node *vfst, *xld, *new_mem;
5707 /* store st(0) on stack */
5708 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5709 set_ia32_op_type(vfst, ia32_AddrModeD);
5710 set_ia32_use_frame(vfst);
5712 /* load into SSE register */
5713 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5714 set_ia32_op_type(xld, ia32_AddrModeS);
5715 set_ia32_use_frame(xld);
5717 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5718 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5720 if (old_mem != NULL) {
5721 edges_reroute(old_mem, new_mem, current_ir_graph);
5725 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5732 /* do the transformation */
5733 void ia32_transform_graph(ia32_code_gen_t *cg)
5737 register_transformers();
5739 initial_fpcw = NULL;
5742 BE_TIMER_PUSH(t_heights);
5743 heights = heights_new(cg->irg);
5744 BE_TIMER_POP(t_heights);
5745 ia32_calculate_non_address_mode_nodes(cg->birg);
5747 /* the transform phase is not safe for CSE (yet) because several nodes get
5748 * attributes set after their creation */
5749 cse_last = get_opt_cse();
5752 call_list = NEW_ARR_F(ir_node *, 0);
5753 call_types = NEW_ARR_F(ir_type *, 0);
5754 be_transform_graph(cg->birg, ia32_pretransform_node);
5756 if (ia32_cg_config.use_sse2)
5757 postprocess_fp_call_results();
5758 DEL_ARR_F(call_types);
5759 DEL_ARR_F(call_list);
5761 set_opt_cse(cse_last);
5763 ia32_free_non_address_mode_nodes();
5764 heights_free(heights);
5768 void ia32_init_transform(void)
5770 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");