2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* cannot use address mode with long double on x87 */
1015 if (get_mode_size_bits(mode) <= 64)
1018 block = get_nodes_block(node);
1019 match_arguments(&am, block, op1, op2, NULL, flags);
1021 dbgi = get_irn_dbg_info(node);
1022 new_block = be_transform_node(block);
1023 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1024 am.new_op1, am.new_op2, get_fpcw());
1025 set_am_attributes(new_node, &am);
1027 attr = get_ia32_x87_attr(new_node);
1028 attr->attr.data.ins_permuted = am.ins_permuted;
1030 SET_IA32_ORIG_NODE(new_node, node);
1032 new_node = fix_mem_proj(new_node, &am);
1038 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1040 * @param op1 The first operand
1041 * @param op2 The second operand
1042 * @param func The node constructor function
1043 * @return The constructed ia32 node.
1045 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1046 construct_shift_func *func,
1047 match_flags_t flags)
1050 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1052 assert(! mode_is_float(get_irn_mode(node)));
1053 assert(flags & match_immediate);
1054 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1056 if (flags & match_mode_neutral) {
1057 op1 = ia32_skip_downconv(op1);
1058 new_op1 = be_transform_node(op1);
1059 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1060 new_op1 = create_upconv(op1, node);
1062 new_op1 = be_transform_node(op1);
1065 /* the shift amount can be any mode that is bigger than 5 bits, since all
1066 * other bits are ignored anyway */
1067 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1068 ir_node *const op = get_Conv_op(op2);
1069 if (mode_is_float(get_irn_mode(op)))
1072 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1074 new_op2 = create_immediate_or_transform(op2, 0);
1076 dbgi = get_irn_dbg_info(node);
1077 block = get_nodes_block(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, new_op1, new_op2);
1080 SET_IA32_ORIG_NODE(new_node, node);
1082 /* lowered shift instruction may have a dependency operand, handle it here */
1083 if (get_irn_arity(node) == 3) {
1084 /* we have a dependency */
1085 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1086 add_irn_dep(new_node, new_dep);
1094 * Construct a standard unary operation, set AM and immediate if required.
1096 * @param op The operand
1097 * @param func The node constructor function
1098 * @return The constructed ia32 node.
1100 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1101 match_flags_t flags)
1104 ir_node *block, *new_block, *new_op, *new_node;
1106 assert(flags == 0 || flags == match_mode_neutral);
1107 if (flags & match_mode_neutral) {
1108 op = ia32_skip_downconv(op);
1111 new_op = be_transform_node(op);
1112 dbgi = get_irn_dbg_info(node);
1113 block = get_nodes_block(node);
1114 new_block = be_transform_node(block);
1115 new_node = func(dbgi, new_block, new_op);
1117 SET_IA32_ORIG_NODE(new_node, node);
1122 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1123 ia32_address_t *addr)
1125 ir_node *base, *index, *res;
1131 base = be_transform_node(base);
1134 index = addr->index;
1135 if (index == NULL) {
1138 index = be_transform_node(index);
1141 res = new_bd_ia32_Lea(dbgi, block, base, index);
1142 set_address(res, addr);
1148 * Returns non-zero if a given address mode has a symbolic or
1149 * numerical offset != 0.
1151 static int am_has_immediates(const ia32_address_t *addr)
1153 return addr->offset != 0 || addr->symconst_ent != NULL
1154 || addr->frame_entity || addr->use_frame;
1158 * Creates an ia32 Add.
1160 * @return the created ia32 Add node
1162 static ir_node *gen_Add(ir_node *node)
1164 ir_mode *mode = get_irn_mode(node);
1165 ir_node *op1 = get_Add_left(node);
1166 ir_node *op2 = get_Add_right(node);
1168 ir_node *block, *new_block, *new_node, *add_immediate_op;
1169 ia32_address_t addr;
1170 ia32_address_mode_t am;
1172 if (mode_is_float(mode)) {
1173 if (ia32_cg_config.use_sse2)
1174 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1175 match_commutative | match_am);
1177 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1180 ia32_mark_non_am(node);
1182 op2 = ia32_skip_downconv(op2);
1183 op1 = ia32_skip_downconv(op1);
1187 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1188 * 1. Add with immediate -> Lea
1189 * 2. Add with possible source address mode -> Add
1190 * 3. Otherwise -> Lea
1192 memset(&addr, 0, sizeof(addr));
1193 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1194 add_immediate_op = NULL;
1196 dbgi = get_irn_dbg_info(node);
1197 block = get_nodes_block(node);
1198 new_block = be_transform_node(block);
1201 if (addr.base == NULL && addr.index == NULL) {
1202 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1203 addr.symconst_sign, 0, addr.offset);
1204 be_dep_on_frame(new_node);
1205 SET_IA32_ORIG_NODE(new_node, node);
1208 /* add with immediate? */
1209 if (addr.index == NULL) {
1210 add_immediate_op = addr.base;
1211 } else if (addr.base == NULL && addr.scale == 0) {
1212 add_immediate_op = addr.index;
1215 if (add_immediate_op != NULL) {
1216 if (!am_has_immediates(&addr)) {
1217 #ifdef DEBUG_libfirm
1218 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1221 return be_transform_node(add_immediate_op);
1224 new_node = create_lea_from_address(dbgi, new_block, &addr);
1225 SET_IA32_ORIG_NODE(new_node, node);
1229 /* test if we can use source address mode */
1230 match_arguments(&am, block, op1, op2, NULL, match_commutative
1231 | match_mode_neutral | match_am | match_immediate | match_try_am);
1233 /* construct an Add with source address mode */
1234 if (am.op_type == ia32_AddrModeS) {
1235 ia32_address_t *am_addr = &am.addr;
1236 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1237 am_addr->index, am_addr->mem, am.new_op1,
1239 set_am_attributes(new_node, &am);
1240 SET_IA32_ORIG_NODE(new_node, node);
1242 new_node = fix_mem_proj(new_node, &am);
1247 /* otherwise construct a lea */
1248 new_node = create_lea_from_address(dbgi, new_block, &addr);
1249 SET_IA32_ORIG_NODE(new_node, node);
1254 * Creates an ia32 Mul.
1256 * @return the created ia32 Mul node
1258 static ir_node *gen_Mul(ir_node *node)
1260 ir_node *op1 = get_Mul_left(node);
1261 ir_node *op2 = get_Mul_right(node);
1262 ir_mode *mode = get_irn_mode(node);
1264 if (mode_is_float(mode)) {
1265 if (ia32_cg_config.use_sse2)
1266 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1267 match_commutative | match_am);
1269 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1271 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1272 match_commutative | match_am | match_mode_neutral |
1273 match_immediate | match_am_and_immediates);
1277 * Creates an ia32 Mulh.
1278 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1279 * this result while Mul returns the lower 32 bit.
1281 * @return the created ia32 Mulh node
1283 static ir_node *gen_Mulh(ir_node *node)
1285 ir_node *block = get_nodes_block(node);
1286 ir_node *new_block = be_transform_node(block);
1287 dbg_info *dbgi = get_irn_dbg_info(node);
1288 ir_node *op1 = get_Mulh_left(node);
1289 ir_node *op2 = get_Mulh_right(node);
1290 ir_mode *mode = get_irn_mode(node);
1292 ir_node *proj_res_high;
1294 if (mode_is_signed(mode)) {
1295 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1296 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1297 mode_Iu, pn_ia32_IMul1OP_res_high);
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1301 mode_Iu, pn_ia32_Mul_res_high);
1303 return proj_res_high;
1307 * Creates an ia32 And.
1309 * @return The created ia32 And node
1311 static ir_node *gen_And(ir_node *node)
1313 ir_node *op1 = get_And_left(node);
1314 ir_node *op2 = get_And_right(node);
1315 assert(! mode_is_float(get_irn_mode(node)));
1317 /* is it a zero extension? */
1318 if (is_Const(op2)) {
1319 tarval *tv = get_Const_tarval(op2);
1320 long v = get_tarval_long(tv);
1322 if (v == 0xFF || v == 0xFFFF) {
1323 dbg_info *dbgi = get_irn_dbg_info(node);
1324 ir_node *block = get_nodes_block(node);
1331 assert(v == 0xFFFF);
1334 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1339 return gen_binop(node, op1, op2, new_bd_ia32_And,
1340 match_commutative | match_mode_neutral | match_am | match_immediate);
1346 * Creates an ia32 Or.
1348 * @return The created ia32 Or node
1350 static ir_node *gen_Or(ir_node *node)
1352 ir_node *op1 = get_Or_left(node);
1353 ir_node *op2 = get_Or_right(node);
1355 assert (! mode_is_float(get_irn_mode(node)));
1356 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1357 | match_mode_neutral | match_am | match_immediate);
1363 * Creates an ia32 Eor.
1365 * @return The created ia32 Eor node
1367 static ir_node *gen_Eor(ir_node *node)
1369 ir_node *op1 = get_Eor_left(node);
1370 ir_node *op2 = get_Eor_right(node);
1372 assert(! mode_is_float(get_irn_mode(node)));
1373 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1374 | match_mode_neutral | match_am | match_immediate);
1379 * Creates an ia32 Sub.
1381 * @return The created ia32 Sub node
1383 static ir_node *gen_Sub(ir_node *node)
1385 ir_node *op1 = get_Sub_left(node);
1386 ir_node *op2 = get_Sub_right(node);
1387 ir_mode *mode = get_irn_mode(node);
1389 if (mode_is_float(mode)) {
1390 if (ia32_cg_config.use_sse2)
1391 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1393 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1396 if (is_Const(op2)) {
1397 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1401 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1402 | match_am | match_immediate);
1405 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1406 ir_node *const src_val,
1407 ir_node *const src_mem,
1408 ir_node *const am_mem)
1410 if (is_NoMem(am_mem)) {
1411 return be_transform_node(src_mem);
1412 } else if (is_Proj(src_val) &&
1414 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1415 /* avoid memory loop */
1417 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1418 ir_node *const ptr_pred = get_Proj_pred(src_val);
1419 int const arity = get_Sync_n_preds(src_mem);
1424 NEW_ARR_A(ir_node*, ins, arity + 1);
1426 /* NOTE: This sometimes produces dead-code because the old sync in
1427 * src_mem might not be used anymore, we should detect this case
1428 * and kill the sync... */
1429 for (i = arity - 1; i >= 0; --i) {
1430 ir_node *const pred = get_Sync_pred(src_mem, i);
1432 /* avoid memory loop */
1433 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1436 ins[n++] = be_transform_node(pred);
1441 return new_r_Sync(irg, block, n, ins);
1445 ins[0] = be_transform_node(src_mem);
1447 return new_r_Sync(irg, block, 2, ins);
1452 * Create a 32bit to 64bit signed extension.
1454 * @param dbgi debug info
1455 * @param block the block where node nodes should be placed
1456 * @param val the value to extend
1457 * @param orig the original node
1459 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1460 ir_node *val, const ir_node *orig)
1465 if (ia32_cg_config.use_short_sex_eax) {
1466 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1467 be_dep_on_frame(pval);
1468 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1470 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1471 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1473 SET_IA32_ORIG_NODE(res, orig);
1478 * Generates an ia32 DivMod with additional infrastructure for the
1479 * register allocator if needed.
1481 static ir_node *create_Div(ir_node *node)
1483 dbg_info *dbgi = get_irn_dbg_info(node);
1484 ir_node *block = get_nodes_block(node);
1485 ir_node *new_block = be_transform_node(block);
1492 ir_node *sign_extension;
1493 ia32_address_mode_t am;
1494 ia32_address_t *addr = &am.addr;
1496 /* the upper bits have random contents for smaller modes */
1497 switch (get_irn_opcode(node)) {
1499 op1 = get_Div_left(node);
1500 op2 = get_Div_right(node);
1501 mem = get_Div_mem(node);
1502 mode = get_Div_resmode(node);
1505 op1 = get_Mod_left(node);
1506 op2 = get_Mod_right(node);
1507 mem = get_Mod_mem(node);
1508 mode = get_Mod_resmode(node);
1511 op1 = get_DivMod_left(node);
1512 op2 = get_DivMod_right(node);
1513 mem = get_DivMod_mem(node);
1514 mode = get_DivMod_resmode(node);
1517 panic("invalid divmod node %+F", node);
1520 match_arguments(&am, block, op1, op2, NULL, match_am);
1522 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1523 is the memory of the consumed address. We can have only the second op as address
1524 in Div nodes, so check only op2. */
1525 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1527 if (mode_is_signed(mode)) {
1528 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1529 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1530 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1532 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1533 be_dep_on_frame(sign_extension);
1535 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1536 addr->index, new_mem, am.new_op2,
1537 am.new_op1, sign_extension);
1540 set_irn_pinned(new_node, get_irn_pinned(node));
1542 set_am_attributes(new_node, &am);
1543 SET_IA32_ORIG_NODE(new_node, node);
1545 new_node = fix_mem_proj(new_node, &am);
1551 * Generates an ia32 Mod.
1553 static ir_node *gen_Mod(ir_node *node)
1555 return create_Div(node);
1559 * Generates an ia32 Div.
1561 static ir_node *gen_Div(ir_node *node)
1563 return create_Div(node);
1567 * Generates an ia32 DivMod.
1569 static ir_node *gen_DivMod(ir_node *node)
1571 return create_Div(node);
1577 * Creates an ia32 floating Div.
1579 * @return The created ia32 xDiv node
1581 static ir_node *gen_Quot(ir_node *node)
1583 ir_node *op1 = get_Quot_left(node);
1584 ir_node *op2 = get_Quot_right(node);
1586 if (ia32_cg_config.use_sse2) {
1587 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1589 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1595 * Creates an ia32 Shl.
1597 * @return The created ia32 Shl node
1599 static ir_node *gen_Shl(ir_node *node)
1601 ir_node *left = get_Shl_left(node);
1602 ir_node *right = get_Shl_right(node);
1604 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1605 match_mode_neutral | match_immediate);
1609 * Creates an ia32 Shr.
1611 * @return The created ia32 Shr node
1613 static ir_node *gen_Shr(ir_node *node)
1615 ir_node *left = get_Shr_left(node);
1616 ir_node *right = get_Shr_right(node);
1618 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1624 * Creates an ia32 Sar.
1626 * @return The created ia32 Shrs node
1628 static ir_node *gen_Shrs(ir_node *node)
1630 ir_node *left = get_Shrs_left(node);
1631 ir_node *right = get_Shrs_right(node);
1633 if (is_Const(right)) {
1634 tarval *tv = get_Const_tarval(right);
1635 long val = get_tarval_long(tv);
1637 /* this is a sign extension */
1638 dbg_info *dbgi = get_irn_dbg_info(node);
1639 ir_node *block = be_transform_node(get_nodes_block(node));
1640 ir_node *new_op = be_transform_node(left);
1642 return create_sex_32_64(dbgi, block, new_op, node);
1646 /* 8 or 16 bit sign extension? */
1647 if (is_Const(right) && is_Shl(left)) {
1648 ir_node *shl_left = get_Shl_left(left);
1649 ir_node *shl_right = get_Shl_right(left);
1650 if (is_Const(shl_right)) {
1651 tarval *tv1 = get_Const_tarval(right);
1652 tarval *tv2 = get_Const_tarval(shl_right);
1653 if (tv1 == tv2 && tarval_is_long(tv1)) {
1654 long val = get_tarval_long(tv1);
1655 if (val == 16 || val == 24) {
1656 dbg_info *dbgi = get_irn_dbg_info(node);
1657 ir_node *block = get_nodes_block(node);
1667 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1676 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1682 * Creates an ia32 Rol.
1684 * @param op1 The first operator
1685 * @param op2 The second operator
1686 * @return The created ia32 RotL node
1688 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1690 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1696 * Creates an ia32 Ror.
1697 * NOTE: There is no RotR with immediate because this would always be a RotL
1698 * "imm-mode_size_bits" which can be pre-calculated.
1700 * @param op1 The first operator
1701 * @param op2 The second operator
1702 * @return The created ia32 RotR node
1704 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1706 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1712 * Creates an ia32 RotR or RotL (depending on the found pattern).
1714 * @return The created ia32 RotL or RotR node
1716 static ir_node *gen_Rotl(ir_node *node)
1718 ir_node *rotate = NULL;
1719 ir_node *op1 = get_Rotl_left(node);
1720 ir_node *op2 = get_Rotl_right(node);
1722 /* Firm has only RotL, so we are looking for a right (op2)
1723 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1724 that means we can create a RotR instead of an Add and a RotL */
1728 ir_node *left = get_Add_left(add);
1729 ir_node *right = get_Add_right(add);
1730 if (is_Const(right)) {
1731 tarval *tv = get_Const_tarval(right);
1732 ir_mode *mode = get_irn_mode(node);
1733 long bits = get_mode_size_bits(mode);
1735 if (is_Minus(left) &&
1736 tarval_is_long(tv) &&
1737 get_tarval_long(tv) == bits &&
1740 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1741 rotate = gen_Ror(node, op1, get_Minus_op(left));
1746 if (rotate == NULL) {
1747 rotate = gen_Rol(node, op1, op2);
1756 * Transforms a Minus node.
1758 * @return The created ia32 Minus node
1760 static ir_node *gen_Minus(ir_node *node)
1762 ir_node *op = get_Minus_op(node);
1763 ir_node *block = be_transform_node(get_nodes_block(node));
1764 dbg_info *dbgi = get_irn_dbg_info(node);
1765 ir_mode *mode = get_irn_mode(node);
1770 if (mode_is_float(mode)) {
1771 ir_node *new_op = be_transform_node(op);
1772 if (ia32_cg_config.use_sse2) {
1773 /* TODO: non-optimal... if we have many xXors, then we should
1774 * rather create a load for the const and use that instead of
1775 * several AM nodes... */
1776 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1778 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1779 nomem, new_op, noreg_xmm);
1781 size = get_mode_size_bits(mode);
1782 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1784 set_ia32_am_sc(new_node, ent);
1785 set_ia32_op_type(new_node, ia32_AddrModeS);
1786 set_ia32_ls_mode(new_node, mode);
1788 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1791 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1794 SET_IA32_ORIG_NODE(new_node, node);
1800 * Transforms a Not node.
1802 * @return The created ia32 Not node
1804 static ir_node *gen_Not(ir_node *node)
1806 ir_node *op = get_Not_op(node);
1808 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1809 assert (! mode_is_float(get_irn_mode(node)));
1811 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1817 * Transforms an Abs node.
1819 * @return The created ia32 Abs node
1821 static ir_node *gen_Abs(ir_node *node)
1823 ir_node *block = get_nodes_block(node);
1824 ir_node *new_block = be_transform_node(block);
1825 ir_node *op = get_Abs_op(node);
1826 dbg_info *dbgi = get_irn_dbg_info(node);
1827 ir_mode *mode = get_irn_mode(node);
1833 if (mode_is_float(mode)) {
1834 new_op = be_transform_node(op);
1836 if (ia32_cg_config.use_sse2) {
1837 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1838 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1839 nomem, new_op, noreg_fp);
1841 size = get_mode_size_bits(mode);
1842 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1844 set_ia32_am_sc(new_node, ent);
1846 SET_IA32_ORIG_NODE(new_node, node);
1848 set_ia32_op_type(new_node, ia32_AddrModeS);
1849 set_ia32_ls_mode(new_node, mode);
1851 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1852 SET_IA32_ORIG_NODE(new_node, node);
1855 ir_node *xor, *sign_extension;
1857 if (get_mode_size_bits(mode) == 32) {
1858 new_op = be_transform_node(op);
1860 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1863 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1865 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1866 nomem, new_op, sign_extension);
1867 SET_IA32_ORIG_NODE(xor, node);
1869 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1870 nomem, xor, sign_extension);
1871 SET_IA32_ORIG_NODE(new_node, node);
1878 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1880 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1882 dbg_info *dbgi = get_irn_dbg_info(cmp);
1883 ir_node *block = get_nodes_block(cmp);
1884 ir_node *new_block = be_transform_node(block);
1885 ir_node *op1 = be_transform_node(x);
1886 ir_node *op2 = be_transform_node(n);
1888 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1892 * Transform a node returning a "flag" result.
1894 * @param node the node to transform
1895 * @param pnc_out the compare mode to use
1897 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1904 /* we have a Cmp as input */
1905 if (is_Proj(node)) {
1906 ir_node *pred = get_Proj_pred(node);
1908 pn_Cmp pnc = get_Proj_proj(node);
1909 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1910 ir_node *l = get_Cmp_left(pred);
1911 ir_node *r = get_Cmp_right(pred);
1913 ir_node *la = get_And_left(l);
1914 ir_node *ra = get_And_right(l);
1916 ir_node *c = get_Shl_left(la);
1917 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1918 /* (1 << n) & ra) */
1919 ir_node *n = get_Shl_right(la);
1920 flags = gen_bt(pred, ra, n);
1921 /* we must generate a Jc/Jnc jump */
1922 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1925 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1930 ir_node *c = get_Shl_left(ra);
1931 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1932 /* la & (1 << n)) */
1933 ir_node *n = get_Shl_right(ra);
1934 flags = gen_bt(pred, la, n);
1935 /* we must generate a Jc/Jnc jump */
1936 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1939 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1945 flags = be_transform_node(pred);
1951 /* a mode_b value, we have to compare it against 0 */
1952 dbgi = get_irn_dbg_info(node);
1953 new_block = be_transform_node(get_nodes_block(node));
1954 new_op = be_transform_node(node);
1955 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1956 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1957 *pnc_out = pn_Cmp_Lg;
1962 * Transforms a Load.
1964 * @return the created ia32 Load node
1966 static ir_node *gen_Load(ir_node *node)
1968 ir_node *old_block = get_nodes_block(node);
1969 ir_node *block = be_transform_node(old_block);
1970 ir_node *ptr = get_Load_ptr(node);
1971 ir_node *mem = get_Load_mem(node);
1972 ir_node *new_mem = be_transform_node(mem);
1975 dbg_info *dbgi = get_irn_dbg_info(node);
1976 ir_mode *mode = get_Load_mode(node);
1979 ia32_address_t addr;
1981 /* construct load address */
1982 memset(&addr, 0, sizeof(addr));
1983 ia32_create_address_mode(&addr, ptr, 0);
1990 base = be_transform_node(base);
1993 if (index == NULL) {
1996 index = be_transform_node(index);
1999 if (mode_is_float(mode)) {
2000 if (ia32_cg_config.use_sse2) {
2001 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2003 res_mode = mode_xmm;
2005 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2007 res_mode = mode_vfp;
2010 assert(mode != mode_b);
2012 /* create a conv node with address mode for smaller modes */
2013 if (get_mode_size_bits(mode) < 32) {
2014 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2015 new_mem, noreg_GP, mode);
2017 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2022 set_irn_pinned(new_node, get_irn_pinned(node));
2023 set_ia32_op_type(new_node, ia32_AddrModeS);
2024 set_ia32_ls_mode(new_node, mode);
2025 set_address(new_node, &addr);
2027 if (get_irn_pinned(node) == op_pin_state_floats) {
2028 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2029 && pn_ia32_vfld_res == pn_ia32_Load_res
2030 && pn_ia32_Load_res == pn_ia32_res);
2031 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2034 SET_IA32_ORIG_NODE(new_node, node);
2036 be_dep_on_frame(new_node);
2040 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2041 ir_node *ptr, ir_node *other)
2048 /* we only use address mode if we're the only user of the load */
2049 if (get_irn_n_edges(node) > 1)
2052 load = get_Proj_pred(node);
2055 if (get_nodes_block(load) != block)
2058 /* store should have the same pointer as the load */
2059 if (get_Load_ptr(load) != ptr)
2062 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2063 if (other != NULL &&
2064 get_nodes_block(other) == block &&
2065 heights_reachable_in_block(heights, other, load)) {
2069 if (prevents_AM(block, load, mem))
2071 /* Store should be attached to the load via mem */
2072 assert(heights_reachable_in_block(heights, mem, load));
2077 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2078 ir_node *mem, ir_node *ptr, ir_mode *mode,
2079 construct_binop_dest_func *func,
2080 construct_binop_dest_func *func8bit,
2081 match_flags_t flags)
2083 ir_node *src_block = get_nodes_block(node);
2091 ia32_address_mode_t am;
2092 ia32_address_t *addr = &am.addr;
2093 memset(&am, 0, sizeof(am));
2095 assert(flags & match_immediate); /* there is no destam node without... */
2096 commutative = (flags & match_commutative) != 0;
2098 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2099 build_address(&am, op1, ia32_create_am_double_use);
2100 new_op = create_immediate_or_transform(op2, 0);
2101 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2102 build_address(&am, op2, ia32_create_am_double_use);
2103 new_op = create_immediate_or_transform(op1, 0);
2108 if (addr->base == NULL)
2109 addr->base = noreg_GP;
2110 if (addr->index == NULL)
2111 addr->index = noreg_GP;
2112 if (addr->mem == NULL)
2115 dbgi = get_irn_dbg_info(node);
2116 block = be_transform_node(src_block);
2117 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2119 if (get_mode_size_bits(mode) == 8) {
2120 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2122 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2124 set_address(new_node, addr);
2125 set_ia32_op_type(new_node, ia32_AddrModeD);
2126 set_ia32_ls_mode(new_node, mode);
2127 SET_IA32_ORIG_NODE(new_node, node);
2129 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2130 mem_proj = be_transform_node(am.mem_proj);
2131 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2136 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2137 ir_node *ptr, ir_mode *mode,
2138 construct_unop_dest_func *func)
2140 ir_node *src_block = get_nodes_block(node);
2146 ia32_address_mode_t am;
2147 ia32_address_t *addr = &am.addr;
2149 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2152 memset(&am, 0, sizeof(am));
2153 build_address(&am, op, ia32_create_am_double_use);
2155 dbgi = get_irn_dbg_info(node);
2156 block = be_transform_node(src_block);
2157 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2158 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2159 set_address(new_node, addr);
2160 set_ia32_op_type(new_node, ia32_AddrModeD);
2161 set_ia32_ls_mode(new_node, mode);
2162 SET_IA32_ORIG_NODE(new_node, node);
2164 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2165 mem_proj = be_transform_node(am.mem_proj);
2166 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2171 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2173 ir_mode *mode = get_irn_mode(node);
2174 ir_node *mux_true = get_Mux_true(node);
2175 ir_node *mux_false = get_Mux_false(node);
2185 ia32_address_t addr;
2187 if (get_mode_size_bits(mode) != 8)
2190 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2192 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2198 build_address_ptr(&addr, ptr, mem);
2200 dbgi = get_irn_dbg_info(node);
2201 block = get_nodes_block(node);
2202 new_block = be_transform_node(block);
2203 cond = get_Mux_sel(node);
2204 flags = get_flags_node(cond, &pnc);
2205 new_mem = be_transform_node(mem);
2206 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2207 addr.index, addr.mem, flags, pnc, negated);
2208 set_address(new_node, &addr);
2209 set_ia32_op_type(new_node, ia32_AddrModeD);
2210 set_ia32_ls_mode(new_node, mode);
2211 SET_IA32_ORIG_NODE(new_node, node);
2216 static ir_node *try_create_dest_am(ir_node *node)
2218 ir_node *val = get_Store_value(node);
2219 ir_node *mem = get_Store_mem(node);
2220 ir_node *ptr = get_Store_ptr(node);
2221 ir_mode *mode = get_irn_mode(val);
2222 unsigned bits = get_mode_size_bits(mode);
2227 /* handle only GP modes for now... */
2228 if (!ia32_mode_needs_gp_reg(mode))
2232 /* store must be the only user of the val node */
2233 if (get_irn_n_edges(val) > 1)
2235 /* skip pointless convs */
2237 ir_node *conv_op = get_Conv_op(val);
2238 ir_mode *pred_mode = get_irn_mode(conv_op);
2239 if (!ia32_mode_needs_gp_reg(pred_mode))
2241 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2249 /* value must be in the same block */
2250 if (get_nodes_block(node) != get_nodes_block(val))
2253 switch (get_irn_opcode(val)) {
2255 op1 = get_Add_left(val);
2256 op2 = get_Add_right(val);
2257 if (ia32_cg_config.use_incdec) {
2258 if (is_Const_1(op2)) {
2259 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2261 } else if (is_Const_Minus_1(op2)) {
2262 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2266 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2267 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2268 match_commutative | match_immediate);
2271 op1 = get_Sub_left(val);
2272 op2 = get_Sub_right(val);
2273 if (is_Const(op2)) {
2274 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2276 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2277 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2281 op1 = get_And_left(val);
2282 op2 = get_And_right(val);
2283 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2284 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2285 match_commutative | match_immediate);
2288 op1 = get_Or_left(val);
2289 op2 = get_Or_right(val);
2290 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2291 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2292 match_commutative | match_immediate);
2295 op1 = get_Eor_left(val);
2296 op2 = get_Eor_right(val);
2297 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2298 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2299 match_commutative | match_immediate);
2302 op1 = get_Shl_left(val);
2303 op2 = get_Shl_right(val);
2304 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2305 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2309 op1 = get_Shr_left(val);
2310 op2 = get_Shr_right(val);
2311 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2312 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2316 op1 = get_Shrs_left(val);
2317 op2 = get_Shrs_right(val);
2318 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2319 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2323 op1 = get_Rotl_left(val);
2324 op2 = get_Rotl_right(val);
2325 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2326 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2329 /* TODO: match ROR patterns... */
2331 new_node = try_create_SetMem(val, ptr, mem);
2334 op1 = get_Minus_op(val);
2335 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2338 /* should be lowered already */
2339 assert(mode != mode_b);
2340 op1 = get_Not_op(val);
2341 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2347 if (new_node != NULL) {
2348 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2349 get_irn_pinned(node) == op_pin_state_pinned) {
2350 set_irn_pinned(new_node, op_pin_state_pinned);
2357 static bool possible_int_mode_for_fp(ir_mode *mode)
2361 if (!mode_is_signed(mode))
2363 size = get_mode_size_bits(mode);
2364 if (size != 16 && size != 32)
2369 static int is_float_to_int_conv(const ir_node *node)
2371 ir_mode *mode = get_irn_mode(node);
2375 if (!possible_int_mode_for_fp(mode))
2380 conv_op = get_Conv_op(node);
2381 conv_mode = get_irn_mode(conv_op);
2383 if (!mode_is_float(conv_mode))
2390 * Transform a Store(floatConst) into a sequence of
2393 * @return the created ia32 Store node
2395 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2397 ir_mode *mode = get_irn_mode(cns);
2398 unsigned size = get_mode_size_bytes(mode);
2399 tarval *tv = get_Const_tarval(cns);
2400 ir_node *block = get_nodes_block(node);
2401 ir_node *new_block = be_transform_node(block);
2402 ir_node *ptr = get_Store_ptr(node);
2403 ir_node *mem = get_Store_mem(node);
2404 dbg_info *dbgi = get_irn_dbg_info(node);
2408 ia32_address_t addr;
2410 assert(size % 4 == 0);
2413 build_address_ptr(&addr, ptr, mem);
2417 get_tarval_sub_bits(tv, ofs) |
2418 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2419 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2420 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2421 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2423 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2424 addr.index, addr.mem, imm);
2426 set_irn_pinned(new_node, get_irn_pinned(node));
2427 set_ia32_op_type(new_node, ia32_AddrModeD);
2428 set_ia32_ls_mode(new_node, mode_Iu);
2429 set_address(new_node, &addr);
2430 SET_IA32_ORIG_NODE(new_node, node);
2433 ins[i++] = new_node;
2438 } while (size != 0);
2441 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2448 * Generate a vfist or vfisttp instruction.
2450 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2451 ir_node *mem, ir_node *val, ir_node **fist)
2455 if (ia32_cg_config.use_fisttp) {
2456 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2457 if other users exists */
2458 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2459 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2460 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2461 be_new_Keep(reg_class, irg, block, 1, &value);
2463 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2466 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2469 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2475 * Transforms a general (no special case) Store.
2477 * @return the created ia32 Store node
2479 static ir_node *gen_general_Store(ir_node *node)
2481 ir_node *val = get_Store_value(node);
2482 ir_mode *mode = get_irn_mode(val);
2483 ir_node *block = get_nodes_block(node);
2484 ir_node *new_block = be_transform_node(block);
2485 ir_node *ptr = get_Store_ptr(node);
2486 ir_node *mem = get_Store_mem(node);
2487 dbg_info *dbgi = get_irn_dbg_info(node);
2488 ir_node *new_val, *new_node, *store;
2489 ia32_address_t addr;
2491 /* check for destination address mode */
2492 new_node = try_create_dest_am(node);
2493 if (new_node != NULL)
2496 /* construct store address */
2497 memset(&addr, 0, sizeof(addr));
2498 ia32_create_address_mode(&addr, ptr, 0);
2500 if (addr.base == NULL) {
2501 addr.base = noreg_GP;
2503 addr.base = be_transform_node(addr.base);
2506 if (addr.index == NULL) {
2507 addr.index = noreg_GP;
2509 addr.index = be_transform_node(addr.index);
2511 addr.mem = be_transform_node(mem);
2513 if (mode_is_float(mode)) {
2514 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2516 while (is_Conv(val) && mode == get_irn_mode(val)) {
2517 ir_node *op = get_Conv_op(val);
2518 if (!mode_is_float(get_irn_mode(op)))
2522 new_val = be_transform_node(val);
2523 if (ia32_cg_config.use_sse2) {
2524 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2525 addr.index, addr.mem, new_val);
2527 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2528 addr.index, addr.mem, new_val, mode);
2531 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2532 val = get_Conv_op(val);
2534 /* TODO: is this optimisation still necessary at all (middleend)? */
2535 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2536 while (is_Conv(val)) {
2537 ir_node *op = get_Conv_op(val);
2538 if (!mode_is_float(get_irn_mode(op)))
2540 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2544 new_val = be_transform_node(val);
2545 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2547 new_val = create_immediate_or_transform(val, 0);
2548 assert(mode != mode_b);
2550 if (get_mode_size_bits(mode) == 8) {
2551 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2552 addr.index, addr.mem, new_val);
2554 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2555 addr.index, addr.mem, new_val);
2560 set_irn_pinned(store, get_irn_pinned(node));
2561 set_ia32_op_type(store, ia32_AddrModeD);
2562 set_ia32_ls_mode(store, mode);
2564 set_address(store, &addr);
2565 SET_IA32_ORIG_NODE(store, node);
2571 * Transforms a Store.
2573 * @return the created ia32 Store node
2575 static ir_node *gen_Store(ir_node *node)
2577 ir_node *val = get_Store_value(node);
2578 ir_mode *mode = get_irn_mode(val);
2580 if (mode_is_float(mode) && is_Const(val)) {
2581 /* We can transform every floating const store
2582 into a sequence of integer stores.
2583 If the constant is already in a register,
2584 it would be better to use it, but we don't
2585 have this information here. */
2586 return gen_float_const_Store(node, val);
2588 return gen_general_Store(node);
2592 * Transforms a Switch.
2594 * @return the created ia32 SwitchJmp node
2596 static ir_node *create_Switch(ir_node *node)
2598 dbg_info *dbgi = get_irn_dbg_info(node);
2599 ir_node *block = be_transform_node(get_nodes_block(node));
2600 ir_node *sel = get_Cond_selector(node);
2601 ir_node *new_sel = be_transform_node(sel);
2602 long switch_min = LONG_MAX;
2603 long switch_max = LONG_MIN;
2604 long default_pn = get_Cond_default_proj(node);
2606 const ir_edge_t *edge;
2608 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2610 /* determine the smallest switch case value */
2611 foreach_out_edge(node, edge) {
2612 ir_node *proj = get_edge_src_irn(edge);
2613 long pn = get_Proj_proj(proj);
2614 if (pn == default_pn)
2617 if (pn < switch_min)
2619 if (pn > switch_max)
2623 if ((unsigned long) (switch_max - switch_min) > 256000) {
2624 panic("Size of switch %+F bigger than 256000", node);
2627 if (switch_min != 0) {
2628 /* if smallest switch case is not 0 we need an additional sub */
2629 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2630 add_ia32_am_offs_int(new_sel, -switch_min);
2631 set_ia32_op_type(new_sel, ia32_AddrModeS);
2633 SET_IA32_ORIG_NODE(new_sel, node);
2636 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2637 SET_IA32_ORIG_NODE(new_node, node);
2643 * Transform a Cond node.
2645 static ir_node *gen_Cond(ir_node *node)
2647 ir_node *block = get_nodes_block(node);
2648 ir_node *new_block = be_transform_node(block);
2649 dbg_info *dbgi = get_irn_dbg_info(node);
2650 ir_node *sel = get_Cond_selector(node);
2651 ir_mode *sel_mode = get_irn_mode(sel);
2652 ir_node *flags = NULL;
2656 if (sel_mode != mode_b) {
2657 return create_Switch(node);
2660 /* we get flags from a Cmp */
2661 flags = get_flags_node(sel, &pnc);
2663 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2664 SET_IA32_ORIG_NODE(new_node, node);
2670 * Transform a be_Copy.
2672 static ir_node *gen_be_Copy(ir_node *node)
2674 ir_node *new_node = be_duplicate_node(node);
2675 ir_mode *mode = get_irn_mode(new_node);
2677 if (ia32_mode_needs_gp_reg(mode)) {
2678 set_irn_mode(new_node, mode_Iu);
2684 static ir_node *create_Fucom(ir_node *node)
2686 dbg_info *dbgi = get_irn_dbg_info(node);
2687 ir_node *block = get_nodes_block(node);
2688 ir_node *new_block = be_transform_node(block);
2689 ir_node *left = get_Cmp_left(node);
2690 ir_node *new_left = be_transform_node(left);
2691 ir_node *right = get_Cmp_right(node);
2695 if (ia32_cg_config.use_fucomi) {
2696 new_right = be_transform_node(right);
2697 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2699 set_ia32_commutative(new_node);
2700 SET_IA32_ORIG_NODE(new_node, node);
2702 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2703 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2705 new_right = be_transform_node(right);
2706 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2709 set_ia32_commutative(new_node);
2711 SET_IA32_ORIG_NODE(new_node, node);
2713 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2714 SET_IA32_ORIG_NODE(new_node, node);
2720 static ir_node *create_Ucomi(ir_node *node)
2722 dbg_info *dbgi = get_irn_dbg_info(node);
2723 ir_node *src_block = get_nodes_block(node);
2724 ir_node *new_block = be_transform_node(src_block);
2725 ir_node *left = get_Cmp_left(node);
2726 ir_node *right = get_Cmp_right(node);
2728 ia32_address_mode_t am;
2729 ia32_address_t *addr = &am.addr;
2731 match_arguments(&am, src_block, left, right, NULL,
2732 match_commutative | match_am);
2734 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2735 addr->mem, am.new_op1, am.new_op2,
2737 set_am_attributes(new_node, &am);
2739 SET_IA32_ORIG_NODE(new_node, node);
2741 new_node = fix_mem_proj(new_node, &am);
2747 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2748 * to fold an and into a test node
2750 static bool can_fold_test_and(ir_node *node)
2752 const ir_edge_t *edge;
2754 /** we can only have eq and lg projs */
2755 foreach_out_edge(node, edge) {
2756 ir_node *proj = get_edge_src_irn(edge);
2757 pn_Cmp pnc = get_Proj_proj(proj);
2758 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2766 * returns true if it is assured, that the upper bits of a node are "clean"
2767 * which means for a 16 or 8 bit value, that the upper bits in the register
2768 * are 0 for unsigned and a copy of the last significant bit for signed
2771 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2773 assert(ia32_mode_needs_gp_reg(mode));
2774 if (get_mode_size_bits(mode) >= 32)
2777 if (is_Proj(transformed_node))
2778 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2780 switch (get_ia32_irn_opcode(transformed_node)) {
2781 case iro_ia32_Conv_I2I:
2782 case iro_ia32_Conv_I2I8Bit: {
2783 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2784 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2786 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2793 if (mode_is_signed(mode)) {
2794 return false; /* TODO handle signed modes */
2796 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2797 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2798 const ia32_immediate_attr_t *attr
2799 = get_ia32_immediate_attr_const(right);
2800 if (attr->symconst == 0 &&
2801 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2805 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2809 /* TODO too conservative if shift amount is constant */
2810 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2813 if (!mode_is_signed(mode)) {
2815 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2816 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2818 /* TODO if one is known to be zero extended, then || is sufficient */
2823 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2824 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2826 case iro_ia32_Const:
2827 case iro_ia32_Immediate: {
2828 const ia32_immediate_attr_t *attr =
2829 get_ia32_immediate_attr_const(transformed_node);
2830 if (mode_is_signed(mode)) {
2831 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2832 return shifted == 0 || shifted == -1;
2834 unsigned long shifted = (unsigned long)attr->offset;
2835 shifted >>= get_mode_size_bits(mode);
2836 return shifted == 0;
2846 * Generate code for a Cmp.
2848 static ir_node *gen_Cmp(ir_node *node)
2850 dbg_info *dbgi = get_irn_dbg_info(node);
2851 ir_node *block = get_nodes_block(node);
2852 ir_node *new_block = be_transform_node(block);
2853 ir_node *left = get_Cmp_left(node);
2854 ir_node *right = get_Cmp_right(node);
2855 ir_mode *cmp_mode = get_irn_mode(left);
2857 ia32_address_mode_t am;
2858 ia32_address_t *addr = &am.addr;
2861 if (mode_is_float(cmp_mode)) {
2862 if (ia32_cg_config.use_sse2) {
2863 return create_Ucomi(node);
2865 return create_Fucom(node);
2869 assert(ia32_mode_needs_gp_reg(cmp_mode));
2871 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2872 cmp_unsigned = !mode_is_signed(cmp_mode);
2873 if (is_Const_0(right) &&
2875 get_irn_n_edges(left) == 1 &&
2876 can_fold_test_and(node)) {
2877 /* Test(and_left, and_right) */
2878 ir_node *and_left = get_And_left(left);
2879 ir_node *and_right = get_And_right(left);
2881 /* matze: code here used mode instead of cmd_mode, I think it is always
2882 * the same as cmp_mode, but I leave this here to see if this is really
2885 assert(get_irn_mode(and_left) == cmp_mode);
2887 match_arguments(&am, block, and_left, and_right, NULL,
2889 match_am | match_8bit_am | match_16bit_am |
2890 match_am_and_immediates | match_immediate);
2892 /* use 32bit compare mode if possible since the opcode is smaller */
2893 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2894 upper_bits_clean(am.new_op2, cmp_mode)) {
2895 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2898 if (get_mode_size_bits(cmp_mode) == 8) {
2899 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2900 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2903 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2904 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2907 /* Cmp(left, right) */
2908 match_arguments(&am, block, left, right, NULL,
2909 match_commutative | match_am | match_8bit_am |
2910 match_16bit_am | match_am_and_immediates |
2912 /* use 32bit compare mode if possible since the opcode is smaller */
2913 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2914 upper_bits_clean(am.new_op2, cmp_mode)) {
2915 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2918 if (get_mode_size_bits(cmp_mode) == 8) {
2919 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2920 addr->index, addr->mem, am.new_op1,
2921 am.new_op2, am.ins_permuted,
2924 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2925 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2928 set_am_attributes(new_node, &am);
2929 set_ia32_ls_mode(new_node, cmp_mode);
2931 SET_IA32_ORIG_NODE(new_node, node);
2933 new_node = fix_mem_proj(new_node, &am);
2938 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2941 dbg_info *dbgi = get_irn_dbg_info(node);
2942 ir_node *block = get_nodes_block(node);
2943 ir_node *new_block = be_transform_node(block);
2944 ir_node *val_true = get_Mux_true(node);
2945 ir_node *val_false = get_Mux_false(node);
2947 ia32_address_mode_t am;
2948 ia32_address_t *addr;
2950 assert(ia32_cg_config.use_cmov);
2951 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2955 match_arguments(&am, block, val_false, val_true, flags,
2956 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2958 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2959 addr->mem, am.new_op1, am.new_op2, new_flags,
2960 am.ins_permuted, pnc);
2961 set_am_attributes(new_node, &am);
2963 SET_IA32_ORIG_NODE(new_node, node);
2965 new_node = fix_mem_proj(new_node, &am);
2971 * Creates a ia32 Setcc instruction.
2973 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2974 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2977 ir_mode *mode = get_irn_mode(orig_node);
2980 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2981 SET_IA32_ORIG_NODE(new_node, orig_node);
2983 /* we might need to conv the result up */
2984 if (get_mode_size_bits(mode) > 8) {
2985 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2986 nomem, new_node, mode_Bu);
2987 SET_IA32_ORIG_NODE(new_node, orig_node);
2994 * Create instruction for an unsigned Difference or Zero.
2996 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2998 ir_graph *irg = current_ir_graph;
2999 ir_mode *mode = get_irn_mode(psi);
3000 ir_node *new_node, *sub, *sbb, *eflags, *block;
3004 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3005 match_mode_neutral | match_am | match_immediate | match_two_users);
3007 block = get_nodes_block(new_node);
3009 if (is_Proj(new_node)) {
3010 sub = get_Proj_pred(new_node);
3011 assert(is_ia32_Sub(sub));
3014 set_irn_mode(sub, mode_T);
3015 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
3017 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3019 dbgi = get_irn_dbg_info(psi);
3020 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3022 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3023 set_ia32_commutative(new_node);
3028 * Create an const array of two float consts.
3030 * @param c0 the first constant
3031 * @param c1 the second constant
3032 * @param new_mode IN/OUT for the mode of the constants, if NULL
3033 * smallest possible mode will be used
3035 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3037 ir_mode *mode = *new_mode;
3039 ir_initializer_t *initializer;
3040 tarval *tv0 = get_Const_tarval(c0);
3041 tarval *tv1 = get_Const_tarval(c1);
3044 /* detect the best mode for the constants */
3045 mode = get_tarval_mode(tv0);
3047 if (mode != mode_F) {
3048 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3049 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3051 tv0 = tarval_convert_to(tv0, mode);
3052 tv1 = tarval_convert_to(tv1, mode);
3053 } else if (mode != mode_D) {
3054 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3055 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3057 tv0 = tarval_convert_to(tv0, mode);
3058 tv1 = tarval_convert_to(tv1, mode);
3065 tp = ia32_create_float_type(mode, 4);
3066 tp = ia32_create_float_array(tp);
3068 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3070 set_entity_ld_ident(ent, get_entity_ident(ent));
3071 set_entity_visibility(ent, visibility_local);
3072 set_entity_variability(ent, variability_constant);
3073 set_entity_allocation(ent, allocation_static);
3075 initializer = create_initializer_compound(2);
3077 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3078 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3080 set_entity_initializer(ent, initializer);
3087 * Transforms a Mux node into some code sequence.
3089 * @return The transformed node.
3091 static ir_node *gen_Mux(ir_node *node)
3093 dbg_info *dbgi = get_irn_dbg_info(node);
3094 ir_node *block = get_nodes_block(node);
3095 ir_node *new_block = be_transform_node(block);
3096 ir_node *mux_true = get_Mux_true(node);
3097 ir_node *mux_false = get_Mux_false(node);
3098 ir_node *cond = get_Mux_sel(node);
3099 ir_mode *mode = get_irn_mode(node);
3104 assert(get_irn_mode(cond) == mode_b);
3106 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3107 if (mode_is_float(mode)) {
3108 ir_node *cmp = get_Proj_pred(cond);
3109 ir_node *cmp_left = get_Cmp_left(cmp);
3110 ir_node *cmp_right = get_Cmp_right(cmp);
3111 pn_Cmp pnc = get_Proj_proj(cond);
3113 if (ia32_cg_config.use_sse2) {
3114 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3115 if (cmp_left == mux_true && cmp_right == mux_false) {
3116 /* Mux(a <= b, a, b) => MIN */
3117 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3118 match_commutative | match_am | match_two_users);
3119 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3120 /* Mux(a <= b, b, a) => MAX */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3122 match_commutative | match_am | match_two_users);
3124 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3125 if (cmp_left == mux_true && cmp_right == mux_false) {
3126 /* Mux(a >= b, a, b) => MAX */
3127 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3128 match_commutative | match_am | match_two_users);
3129 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3130 /* Mux(a >= b, b, a) => MIN */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3132 match_commutative | match_am | match_two_users);
3136 if (is_Const(mux_true) && is_Const(mux_false)) {
3137 ia32_address_mode_t am;
3142 flags = get_flags_node(cond, &pnc);
3143 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3145 if (ia32_cg_config.use_sse2) {
3146 /* cannot load from different mode on SSE */
3149 /* x87 can load any mode */
3153 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3155 switch (get_mode_size_bytes(new_mode)) {
3165 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3166 set_ia32_am_scale(new_node, 2);
3171 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3172 set_ia32_am_scale(new_node, 1);
3175 /* arg, shift 16 NOT supported */
3177 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3180 panic("Unsupported constant size");
3183 am.ls_mode = new_mode;
3184 am.addr.base = noreg_GP;
3185 am.addr.index = new_node;
3186 am.addr.mem = nomem;
3188 am.addr.scale = scale;
3189 am.addr.use_frame = 0;
3190 am.addr.frame_entity = NULL;
3191 am.addr.symconst_sign = 0;
3192 am.mem_proj = am.addr.mem;
3193 am.op_type = ia32_AddrModeS;
3196 am.pinned = op_pin_state_floats;
3198 am.ins_permuted = 0;
3200 if (ia32_cg_config.use_sse2)
3201 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3203 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3204 set_am_attributes(load, &am);
3206 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3208 panic("cannot transform floating point Mux");
3211 assert(ia32_mode_needs_gp_reg(mode));
3213 if (is_Proj(cond)) {
3214 ir_node *cmp = get_Proj_pred(cond);
3216 ir_node *cmp_left = get_Cmp_left(cmp);
3217 ir_node *cmp_right = get_Cmp_right(cmp);
3218 pn_Cmp pnc = get_Proj_proj(cond);
3220 /* check for unsigned Doz first */
3221 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3222 is_Const_0(mux_false) && is_Sub(mux_true) &&
3223 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3224 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3225 return create_Doz(node, cmp_left, cmp_right);
3226 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3227 is_Const_0(mux_true) && is_Sub(mux_false) &&
3228 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3229 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3230 return create_Doz(node, cmp_left, cmp_right);
3235 flags = get_flags_node(cond, &pnc);
3237 if (is_Const(mux_true) && is_Const(mux_false)) {
3238 /* both are const, good */
3239 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3240 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3241 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3242 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3244 /* Not that simple. */
3249 new_node = create_CMov(node, cond, flags, pnc);
3257 * Create a conversion from x87 state register to general purpose.
3259 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3261 ir_node *block = be_transform_node(get_nodes_block(node));
3262 ir_node *op = get_Conv_op(node);
3263 ir_node *new_op = be_transform_node(op);
3264 ir_graph *irg = current_ir_graph;
3265 dbg_info *dbgi = get_irn_dbg_info(node);
3266 ir_mode *mode = get_irn_mode(node);
3267 ir_node *fist, *load, *mem;
3269 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3270 set_irn_pinned(fist, op_pin_state_floats);
3271 set_ia32_use_frame(fist);
3272 set_ia32_op_type(fist, ia32_AddrModeD);
3274 assert(get_mode_size_bits(mode) <= 32);
3275 /* exception we can only store signed 32 bit integers, so for unsigned
3276 we store a 64bit (signed) integer and load the lower bits */
3277 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3278 set_ia32_ls_mode(fist, mode_Ls);
3280 set_ia32_ls_mode(fist, mode_Is);
3282 SET_IA32_ORIG_NODE(fist, node);
3285 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3287 set_irn_pinned(load, op_pin_state_floats);
3288 set_ia32_use_frame(load);
3289 set_ia32_op_type(load, ia32_AddrModeS);
3290 set_ia32_ls_mode(load, mode_Is);
3291 if (get_ia32_ls_mode(fist) == mode_Ls) {
3292 ia32_attr_t *attr = get_ia32_attr(load);
3293 attr->data.need_64bit_stackent = 1;
3295 ia32_attr_t *attr = get_ia32_attr(load);
3296 attr->data.need_32bit_stackent = 1;
3298 SET_IA32_ORIG_NODE(load, node);
3300 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3304 * Creates a x87 strict Conv by placing a Store and a Load
3306 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3308 ir_node *block = get_nodes_block(node);
3309 ir_graph *irg = current_ir_graph;
3310 dbg_info *dbgi = get_irn_dbg_info(node);
3311 ir_node *frame = get_irg_frame(irg);
3312 ir_node *store, *load;
3315 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3316 set_ia32_use_frame(store);
3317 set_ia32_op_type(store, ia32_AddrModeD);
3318 SET_IA32_ORIG_NODE(store, node);
3320 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3321 set_ia32_use_frame(load);
3322 set_ia32_op_type(load, ia32_AddrModeS);
3323 SET_IA32_ORIG_NODE(load, node);
3325 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3329 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3330 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3332 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3334 func = get_mode_size_bits(mode) == 8 ?
3335 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3336 return func(dbgi, block, base, index, mem, val, mode);
3340 * Create a conversion from general purpose to x87 register
3342 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3344 ir_node *src_block = get_nodes_block(node);
3345 ir_node *block = be_transform_node(src_block);
3346 ir_graph *irg = current_ir_graph;
3347 dbg_info *dbgi = get_irn_dbg_info(node);
3348 ir_node *op = get_Conv_op(node);
3349 ir_node *new_op = NULL;
3351 ir_mode *store_mode;
3356 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3357 if (possible_int_mode_for_fp(src_mode)) {
3358 ia32_address_mode_t am;
3360 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3361 if (am.op_type == ia32_AddrModeS) {
3362 ia32_address_t *addr = &am.addr;
3364 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3365 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3367 set_am_attributes(fild, &am);
3368 SET_IA32_ORIG_NODE(fild, node);
3370 fix_mem_proj(fild, &am);
3375 if (new_op == NULL) {
3376 new_op = be_transform_node(op);
3379 mode = get_irn_mode(op);
3381 /* first convert to 32 bit signed if necessary */
3382 if (get_mode_size_bits(src_mode) < 32) {
3383 if (!upper_bits_clean(new_op, src_mode)) {
3384 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3385 SET_IA32_ORIG_NODE(new_op, node);
3390 assert(get_mode_size_bits(mode) == 32);
3393 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3395 set_ia32_use_frame(store);
3396 set_ia32_op_type(store, ia32_AddrModeD);
3397 set_ia32_ls_mode(store, mode_Iu);
3399 /* exception for 32bit unsigned, do a 64bit spill+load */
3400 if (!mode_is_signed(mode)) {
3403 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3405 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3406 noreg_GP, nomem, zero_const);
3408 set_ia32_use_frame(zero_store);
3409 set_ia32_op_type(zero_store, ia32_AddrModeD);
3410 add_ia32_am_offs_int(zero_store, 4);
3411 set_ia32_ls_mode(zero_store, mode_Iu);
3416 store = new_rd_Sync(dbgi, irg, block, 2, in);
3417 store_mode = mode_Ls;
3419 store_mode = mode_Is;
3423 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3425 set_ia32_use_frame(fild);
3426 set_ia32_op_type(fild, ia32_AddrModeS);
3427 set_ia32_ls_mode(fild, store_mode);
3429 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3435 * Create a conversion from one integer mode into another one
3437 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3438 dbg_info *dbgi, ir_node *block, ir_node *op,
3441 ir_node *new_block = be_transform_node(block);
3443 ir_mode *smaller_mode;
3444 ia32_address_mode_t am;
3445 ia32_address_t *addr = &am.addr;
3448 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3449 smaller_mode = src_mode;
3451 smaller_mode = tgt_mode;
3454 #ifdef DEBUG_libfirm
3456 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3461 match_arguments(&am, block, NULL, op, NULL,
3462 match_am | match_8bit_am | match_16bit_am);
3464 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3465 /* unnecessary conv. in theory it shouldn't have been AM */
3466 assert(is_ia32_NoReg_GP(addr->base));
3467 assert(is_ia32_NoReg_GP(addr->index));
3468 assert(is_NoMem(addr->mem));
3469 assert(am.addr.offset == 0);
3470 assert(am.addr.symconst_ent == NULL);
3474 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3475 addr->mem, am.new_op2, smaller_mode);
3476 set_am_attributes(new_node, &am);
3477 /* match_arguments assume that out-mode = in-mode, this isn't true here
3479 set_ia32_ls_mode(new_node, smaller_mode);
3480 SET_IA32_ORIG_NODE(new_node, node);
3481 new_node = fix_mem_proj(new_node, &am);
3486 * Transforms a Conv node.
3488 * @return The created ia32 Conv node
3490 static ir_node *gen_Conv(ir_node *node)
3492 ir_node *block = get_nodes_block(node);
3493 ir_node *new_block = be_transform_node(block);
3494 ir_node *op = get_Conv_op(node);
3495 ir_node *new_op = NULL;
3496 dbg_info *dbgi = get_irn_dbg_info(node);
3497 ir_mode *src_mode = get_irn_mode(op);
3498 ir_mode *tgt_mode = get_irn_mode(node);
3499 int src_bits = get_mode_size_bits(src_mode);
3500 int tgt_bits = get_mode_size_bits(tgt_mode);
3501 ir_node *res = NULL;
3503 assert(!mode_is_int(src_mode) || src_bits <= 32);
3504 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3506 if (src_mode == mode_b) {
3507 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3508 /* nothing to do, we already model bools as 0/1 ints */
3509 return be_transform_node(op);
3512 if (src_mode == tgt_mode) {
3513 if (get_Conv_strict(node)) {
3514 if (ia32_cg_config.use_sse2) {
3515 /* when we are in SSE mode, we can kill all strict no-op conversion */
3516 return be_transform_node(op);
3519 /* this should be optimized already, but who knows... */
3520 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3521 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3522 return be_transform_node(op);
3526 if (mode_is_float(src_mode)) {
3527 new_op = be_transform_node(op);
3528 /* we convert from float ... */
3529 if (mode_is_float(tgt_mode)) {
3531 /* Matze: I'm a bit unsure what the following is for? seems wrong
3533 if (src_mode == mode_E && tgt_mode == mode_D
3534 && !get_Conv_strict(node)) {
3535 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3541 if (ia32_cg_config.use_sse2) {
3542 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3543 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3545 set_ia32_ls_mode(res, tgt_mode);
3547 if (get_Conv_strict(node)) {
3548 /* if fp_no_float_fold is not set then we assume that we
3549 * don't have any float operations in a non
3550 * mode_float_arithmetic mode and can skip strict upconvs */
3551 if (src_bits < tgt_bits
3552 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3553 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3556 res = gen_x87_strict_conv(tgt_mode, new_op);
3557 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3561 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3566 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3567 if (ia32_cg_config.use_sse2) {
3568 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3570 set_ia32_ls_mode(res, src_mode);
3572 return gen_x87_fp_to_gp(node);
3576 /* we convert from int ... */
3577 if (mode_is_float(tgt_mode)) {
3579 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3580 if (ia32_cg_config.use_sse2) {
3581 new_op = be_transform_node(op);
3582 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3584 set_ia32_ls_mode(res, tgt_mode);
3586 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3587 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3588 res = gen_x87_gp_to_fp(node, src_mode);
3590 /* we need a strict-Conv, if the int mode has more bits than the
3592 if (float_mantissa < int_mantissa) {
3593 res = gen_x87_strict_conv(tgt_mode, res);
3594 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3598 } else if (tgt_mode == mode_b) {
3599 /* mode_b lowering already took care that we only have 0/1 values */
3600 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3601 src_mode, tgt_mode));
3602 return be_transform_node(op);
3605 if (src_bits == tgt_bits) {
3606 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3607 src_mode, tgt_mode));
3608 return be_transform_node(op);
3611 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3619 static ir_node *create_immediate_or_transform(ir_node *node,
3620 char immediate_constraint_type)
3622 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3623 if (new_node == NULL) {
3624 new_node = be_transform_node(node);
3630 * Transforms a FrameAddr into an ia32 Add.
3632 static ir_node *gen_be_FrameAddr(ir_node *node)
3634 ir_node *block = be_transform_node(get_nodes_block(node));
3635 ir_node *op = be_get_FrameAddr_frame(node);
3636 ir_node *new_op = be_transform_node(op);
3637 dbg_info *dbgi = get_irn_dbg_info(node);
3640 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3641 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3642 set_ia32_use_frame(new_node);
3644 SET_IA32_ORIG_NODE(new_node, node);
3650 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3652 static ir_node *gen_be_Return(ir_node *node)
3654 ir_graph *irg = current_ir_graph;
3655 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3656 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3657 ir_entity *ent = get_irg_entity(irg);
3658 ir_type *tp = get_entity_type(ent);
3663 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3664 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3666 int pn_ret_val, pn_ret_mem, arity, i;
3668 assert(ret_val != NULL);
3669 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3670 return be_duplicate_node(node);
3673 res_type = get_method_res_type(tp, 0);
3675 if (! is_Primitive_type(res_type)) {
3676 return be_duplicate_node(node);
3679 mode = get_type_mode(res_type);
3680 if (! mode_is_float(mode)) {
3681 return be_duplicate_node(node);
3684 assert(get_method_n_ress(tp) == 1);
3686 pn_ret_val = get_Proj_proj(ret_val);
3687 pn_ret_mem = get_Proj_proj(ret_mem);
3689 /* get the Barrier */
3690 barrier = get_Proj_pred(ret_val);
3692 /* get result input of the Barrier */
3693 ret_val = get_irn_n(barrier, pn_ret_val);
3694 new_ret_val = be_transform_node(ret_val);
3696 /* get memory input of the Barrier */
3697 ret_mem = get_irn_n(barrier, pn_ret_mem);
3698 new_ret_mem = be_transform_node(ret_mem);
3700 frame = get_irg_frame(irg);
3702 dbgi = get_irn_dbg_info(barrier);
3703 block = be_transform_node(get_nodes_block(barrier));
3705 /* store xmm0 onto stack */
3706 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3707 new_ret_mem, new_ret_val);
3708 set_ia32_ls_mode(sse_store, mode);
3709 set_ia32_op_type(sse_store, ia32_AddrModeD);
3710 set_ia32_use_frame(sse_store);
3712 /* load into x87 register */
3713 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3714 set_ia32_op_type(fld, ia32_AddrModeS);
3715 set_ia32_use_frame(fld);
3717 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3718 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3720 /* create a new barrier */
3721 arity = get_irn_arity(barrier);
3722 in = ALLOCAN(ir_node*, arity);
3723 for (i = 0; i < arity; ++i) {
3726 if (i == pn_ret_val) {
3728 } else if (i == pn_ret_mem) {
3731 ir_node *in = get_irn_n(barrier, i);
3732 new_in = be_transform_node(in);
3737 new_barrier = new_ir_node(dbgi, irg, block,
3738 get_irn_op(barrier), get_irn_mode(barrier),
3740 copy_node_attr(barrier, new_barrier);
3741 be_duplicate_deps(barrier, new_barrier);
3742 be_set_transformed_node(barrier, new_barrier);
3744 /* transform normally */
3745 return be_duplicate_node(node);
3749 * Transform a be_AddSP into an ia32_SubSP.
3751 static ir_node *gen_be_AddSP(ir_node *node)
3753 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3754 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3756 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3757 match_am | match_immediate);
3761 * Transform a be_SubSP into an ia32_AddSP
3763 static ir_node *gen_be_SubSP(ir_node *node)
3765 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3766 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3768 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3769 match_am | match_immediate);
3773 * Change some phi modes
3775 static ir_node *gen_Phi(ir_node *node)
3777 ir_node *block = be_transform_node(get_nodes_block(node));
3778 ir_graph *irg = current_ir_graph;
3779 dbg_info *dbgi = get_irn_dbg_info(node);
3780 ir_mode *mode = get_irn_mode(node);
3783 if (ia32_mode_needs_gp_reg(mode)) {
3784 /* we shouldn't have any 64bit stuff around anymore */
3785 assert(get_mode_size_bits(mode) <= 32);
3786 /* all integer operations are on 32bit registers now */
3788 } else if (mode_is_float(mode)) {
3789 if (ia32_cg_config.use_sse2) {
3796 /* phi nodes allow loops, so we use the old arguments for now
3797 * and fix this later */
3798 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3799 get_irn_in(node) + 1);
3800 copy_node_attr(node, phi);
3801 be_duplicate_deps(node, phi);
3803 be_enqueue_preds(node);
3811 static ir_node *gen_IJmp(ir_node *node)
3813 ir_node *block = get_nodes_block(node);
3814 ir_node *new_block = be_transform_node(block);
3815 dbg_info *dbgi = get_irn_dbg_info(node);
3816 ir_node *op = get_IJmp_target(node);
3818 ia32_address_mode_t am;
3819 ia32_address_t *addr = &am.addr;
3821 assert(get_irn_mode(op) == mode_P);
3823 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3825 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3826 addr->mem, am.new_op2);
3827 set_am_attributes(new_node, &am);
3828 SET_IA32_ORIG_NODE(new_node, node);
3830 new_node = fix_mem_proj(new_node, &am);
3836 * Transform a Bound node.
3838 static ir_node *gen_Bound(ir_node *node)
3841 ir_node *lower = get_Bound_lower(node);
3842 dbg_info *dbgi = get_irn_dbg_info(node);
3844 if (is_Const_0(lower)) {
3845 /* typical case for Java */
3846 ir_node *sub, *res, *flags, *block;
3847 ir_graph *irg = current_ir_graph;
3849 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3850 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3852 block = get_nodes_block(res);
3853 if (! is_Proj(res)) {
3855 set_irn_mode(sub, mode_T);
3856 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3858 sub = get_Proj_pred(res);
3860 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3861 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3862 SET_IA32_ORIG_NODE(new_node, node);
3864 panic("generic Bound not supported in ia32 Backend");
3870 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3872 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3873 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3875 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3876 match_immediate | match_mode_neutral);
3879 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3881 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3882 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3883 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3887 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3889 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3890 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3891 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3895 static ir_node *gen_ia32_l_Add(ir_node *node)
3897 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3898 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3899 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3900 match_commutative | match_am | match_immediate |
3901 match_mode_neutral);
3903 if (is_Proj(lowered)) {
3904 lowered = get_Proj_pred(lowered);
3906 assert(is_ia32_Add(lowered));
3907 set_irn_mode(lowered, mode_T);
3913 static ir_node *gen_ia32_l_Adc(ir_node *node)
3915 return gen_binop_flags(node, new_bd_ia32_Adc,
3916 match_commutative | match_am | match_immediate |
3917 match_mode_neutral);
3921 * Transforms a l_MulS into a "real" MulS node.
3923 * @return the created ia32 Mul node
3925 static ir_node *gen_ia32_l_Mul(ir_node *node)
3927 ir_node *left = get_binop_left(node);
3928 ir_node *right = get_binop_right(node);
3930 return gen_binop(node, left, right, new_bd_ia32_Mul,
3931 match_commutative | match_am | match_mode_neutral);
3935 * Transforms a l_IMulS into a "real" IMul1OPS node.
3937 * @return the created ia32 IMul1OP node
3939 static ir_node *gen_ia32_l_IMul(ir_node *node)
3941 ir_node *left = get_binop_left(node);
3942 ir_node *right = get_binop_right(node);
3944 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3945 match_commutative | match_am | match_mode_neutral);
3948 static ir_node *gen_ia32_l_Sub(ir_node *node)
3950 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3951 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3952 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3953 match_am | match_immediate | match_mode_neutral);
3955 if (is_Proj(lowered)) {
3956 lowered = get_Proj_pred(lowered);
3958 assert(is_ia32_Sub(lowered));
3959 set_irn_mode(lowered, mode_T);
3965 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3967 return gen_binop_flags(node, new_bd_ia32_Sbb,
3968 match_am | match_immediate | match_mode_neutral);
3972 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3973 * op1 - target to be shifted
3974 * op2 - contains bits to be shifted into target
3976 * Only op3 can be an immediate.
3978 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3979 ir_node *low, ir_node *count)
3981 ir_node *block = get_nodes_block(node);
3982 ir_node *new_block = be_transform_node(block);
3983 dbg_info *dbgi = get_irn_dbg_info(node);
3984 ir_node *new_high = be_transform_node(high);
3985 ir_node *new_low = be_transform_node(low);
3989 /* the shift amount can be any mode that is bigger than 5 bits, since all
3990 * other bits are ignored anyway */
3991 while (is_Conv(count) &&
3992 get_irn_n_edges(count) == 1 &&
3993 mode_is_int(get_irn_mode(count))) {
3994 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3995 count = get_Conv_op(count);
3997 new_count = create_immediate_or_transform(count, 0);
3999 if (is_ia32_l_ShlD(node)) {
4000 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4003 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4006 SET_IA32_ORIG_NODE(new_node, node);
4011 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4013 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4014 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4015 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4016 return gen_lowered_64bit_shifts(node, high, low, count);
4019 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4021 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4022 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4023 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4024 return gen_lowered_64bit_shifts(node, high, low, count);
4027 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4029 ir_node *src_block = get_nodes_block(node);
4030 ir_node *block = be_transform_node(src_block);
4031 ir_graph *irg = current_ir_graph;
4032 dbg_info *dbgi = get_irn_dbg_info(node);
4033 ir_node *frame = get_irg_frame(irg);
4034 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4035 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4036 ir_node *new_val_low = be_transform_node(val_low);
4037 ir_node *new_val_high = be_transform_node(val_high);
4039 ir_node *sync, *fild, *res;
4040 ir_node *store_low, *store_high;
4042 if (ia32_cg_config.use_sse2) {
4043 panic("ia32_l_LLtoFloat not implemented for SSE2");
4047 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4049 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4051 SET_IA32_ORIG_NODE(store_low, node);
4052 SET_IA32_ORIG_NODE(store_high, node);
4054 set_ia32_use_frame(store_low);
4055 set_ia32_use_frame(store_high);
4056 set_ia32_op_type(store_low, ia32_AddrModeD);
4057 set_ia32_op_type(store_high, ia32_AddrModeD);
4058 set_ia32_ls_mode(store_low, mode_Iu);
4059 set_ia32_ls_mode(store_high, mode_Is);
4060 add_ia32_am_offs_int(store_high, 4);
4064 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4067 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4069 set_ia32_use_frame(fild);
4070 set_ia32_op_type(fild, ia32_AddrModeS);
4071 set_ia32_ls_mode(fild, mode_Ls);
4073 SET_IA32_ORIG_NODE(fild, node);
4075 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4077 if (! mode_is_signed(get_irn_mode(val_high))) {
4078 ia32_address_mode_t am;
4080 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4083 am.addr.base = noreg_GP;
4084 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4085 am.addr.mem = nomem;
4088 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4089 am.addr.use_frame = 0;
4090 am.addr.frame_entity = NULL;
4091 am.addr.symconst_sign = 0;
4092 am.ls_mode = mode_F;
4093 am.mem_proj = nomem;
4094 am.op_type = ia32_AddrModeS;
4096 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4097 am.pinned = op_pin_state_floats;
4099 am.ins_permuted = 0;
4101 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4102 am.new_op1, am.new_op2, get_fpcw());
4103 set_am_attributes(fadd, &am);
4105 set_irn_mode(fadd, mode_T);
4106 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4111 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4113 ir_node *src_block = get_nodes_block(node);
4114 ir_node *block = be_transform_node(src_block);
4115 ir_graph *irg = current_ir_graph;
4116 dbg_info *dbgi = get_irn_dbg_info(node);
4117 ir_node *frame = get_irg_frame(irg);
4118 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4119 ir_node *new_val = be_transform_node(val);
4120 ir_node *fist, *mem;
4122 mem = gen_vfist(dbgi, irg, block, frame, noreg_GP, nomem, new_val, &fist);
4123 SET_IA32_ORIG_NODE(fist, node);
4124 set_ia32_use_frame(fist);
4125 set_ia32_op_type(fist, ia32_AddrModeD);
4126 set_ia32_ls_mode(fist, mode_Ls);
4132 * the BAD transformer.
4134 static ir_node *bad_transform(ir_node *node)
4136 panic("No transform function for %+F available.", node);
4140 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4142 ir_graph *irg = current_ir_graph;
4143 ir_node *block = be_transform_node(get_nodes_block(node));
4144 ir_node *pred = get_Proj_pred(node);
4145 ir_node *new_pred = be_transform_node(pred);
4146 ir_node *frame = get_irg_frame(irg);
4147 dbg_info *dbgi = get_irn_dbg_info(node);
4148 long pn = get_Proj_proj(node);
4153 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4154 SET_IA32_ORIG_NODE(load, node);
4155 set_ia32_use_frame(load);
4156 set_ia32_op_type(load, ia32_AddrModeS);
4157 set_ia32_ls_mode(load, mode_Iu);
4158 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4159 * 32 bit from it with this particular load */
4160 attr = get_ia32_attr(load);
4161 attr->data.need_64bit_stackent = 1;
4163 if (pn == pn_ia32_l_FloattoLL_res_high) {
4164 add_ia32_am_offs_int(load, 4);
4166 assert(pn == pn_ia32_l_FloattoLL_res_low);
4169 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4175 * Transform the Projs of an AddSP.
4177 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4179 ir_node *block = be_transform_node(get_nodes_block(node));
4180 ir_node *pred = get_Proj_pred(node);
4181 ir_node *new_pred = be_transform_node(pred);
4182 ir_graph *irg = current_ir_graph;
4183 dbg_info *dbgi = get_irn_dbg_info(node);
4184 long proj = get_Proj_proj(node);
4186 if (proj == pn_be_AddSP_sp) {
4187 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4188 pn_ia32_SubSP_stack);
4189 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4191 } else if (proj == pn_be_AddSP_res) {
4192 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4193 pn_ia32_SubSP_addr);
4194 } else if (proj == pn_be_AddSP_M) {
4195 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4198 panic("No idea how to transform proj->AddSP");
4202 * Transform the Projs of a SubSP.
4204 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4206 ir_node *block = be_transform_node(get_nodes_block(node));
4207 ir_node *pred = get_Proj_pred(node);
4208 ir_node *new_pred = be_transform_node(pred);
4209 ir_graph *irg = current_ir_graph;
4210 dbg_info *dbgi = get_irn_dbg_info(node);
4211 long proj = get_Proj_proj(node);
4213 if (proj == pn_be_SubSP_sp) {
4214 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4215 pn_ia32_AddSP_stack);
4216 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4218 } else if (proj == pn_be_SubSP_M) {
4219 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4222 panic("No idea how to transform proj->SubSP");
4226 * Transform and renumber the Projs from a Load.
4228 static ir_node *gen_Proj_Load(ir_node *node)
4231 ir_node *block = be_transform_node(get_nodes_block(node));
4232 ir_node *pred = get_Proj_pred(node);
4233 ir_graph *irg = current_ir_graph;
4234 dbg_info *dbgi = get_irn_dbg_info(node);
4235 long proj = get_Proj_proj(node);
4237 /* loads might be part of source address mode matches, so we don't
4238 * transform the ProjMs yet (with the exception of loads whose result is
4241 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4244 /* this is needed, because sometimes we have loops that are only
4245 reachable through the ProjM */
4246 be_enqueue_preds(node);
4247 /* do it in 2 steps, to silence firm verifier */
4248 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4249 set_Proj_proj(res, pn_ia32_mem);
4253 /* renumber the proj */
4254 new_pred = be_transform_node(pred);
4255 if (is_ia32_Load(new_pred)) {
4258 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4260 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4261 case pn_Load_X_regular:
4262 return new_rd_Jmp(dbgi, irg, block);
4263 case pn_Load_X_except:
4264 /* This Load might raise an exception. Mark it. */
4265 set_ia32_exc_label(new_pred, 1);
4266 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4270 } else if (is_ia32_Conv_I2I(new_pred) ||
4271 is_ia32_Conv_I2I8Bit(new_pred)) {
4272 set_irn_mode(new_pred, mode_T);
4273 if (proj == pn_Load_res) {
4274 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4275 } else if (proj == pn_Load_M) {
4276 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4278 } else if (is_ia32_xLoad(new_pred)) {
4281 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4283 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4284 case pn_Load_X_regular:
4285 return new_rd_Jmp(dbgi, irg, block);
4286 case pn_Load_X_except:
4287 /* This Load might raise an exception. Mark it. */
4288 set_ia32_exc_label(new_pred, 1);
4289 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4293 } else if (is_ia32_vfld(new_pred)) {
4296 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4298 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4299 case pn_Load_X_regular:
4300 return new_rd_Jmp(dbgi, irg, block);
4301 case pn_Load_X_except:
4302 /* This Load might raise an exception. Mark it. */
4303 set_ia32_exc_label(new_pred, 1);
4304 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4309 /* can happen for ProJMs when source address mode happened for the
4312 /* however it should not be the result proj, as that would mean the
4313 load had multiple users and should not have been used for
4315 if (proj != pn_Load_M) {
4316 panic("internal error: transformed node not a Load");
4318 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4321 panic("No idea how to transform proj");
4325 * Transform and renumber the Projs from a DivMod like instruction.
4327 static ir_node *gen_Proj_DivMod(ir_node *node)
4329 ir_node *block = be_transform_node(get_nodes_block(node));
4330 ir_node *pred = get_Proj_pred(node);
4331 ir_node *new_pred = be_transform_node(pred);
4332 ir_graph *irg = current_ir_graph;
4333 dbg_info *dbgi = get_irn_dbg_info(node);
4334 long proj = get_Proj_proj(node);
4336 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4338 switch (get_irn_opcode(pred)) {
4342 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4344 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4345 case pn_Div_X_regular:
4346 return new_rd_Jmp(dbgi, irg, block);
4347 case pn_Div_X_except:
4348 set_ia32_exc_label(new_pred, 1);
4349 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4357 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4359 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4360 case pn_Mod_X_except:
4361 set_ia32_exc_label(new_pred, 1);
4362 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4370 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4371 case pn_DivMod_res_div:
4372 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4373 case pn_DivMod_res_mod:
4374 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4375 case pn_DivMod_X_regular:
4376 return new_rd_Jmp(dbgi, irg, block);
4377 case pn_DivMod_X_except:
4378 set_ia32_exc_label(new_pred, 1);
4379 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4388 panic("No idea how to transform proj->DivMod");
4392 * Transform and renumber the Projs from a CopyB.
4394 static ir_node *gen_Proj_CopyB(ir_node *node)
4396 ir_node *block = be_transform_node(get_nodes_block(node));
4397 ir_node *pred = get_Proj_pred(node);
4398 ir_node *new_pred = be_transform_node(pred);
4399 ir_graph *irg = current_ir_graph;
4400 dbg_info *dbgi = get_irn_dbg_info(node);
4401 long proj = get_Proj_proj(node);
4404 case pn_CopyB_M_regular:
4405 if (is_ia32_CopyB_i(new_pred)) {
4406 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4407 } else if (is_ia32_CopyB(new_pred)) {
4408 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4415 panic("No idea how to transform proj->CopyB");
4419 * Transform and renumber the Projs from a Quot.
4421 static ir_node *gen_Proj_Quot(ir_node *node)
4423 ir_node *block = be_transform_node(get_nodes_block(node));
4424 ir_node *pred = get_Proj_pred(node);
4425 ir_node *new_pred = be_transform_node(pred);
4426 ir_graph *irg = current_ir_graph;
4427 dbg_info *dbgi = get_irn_dbg_info(node);
4428 long proj = get_Proj_proj(node);
4432 if (is_ia32_xDiv(new_pred)) {
4433 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4434 } else if (is_ia32_vfdiv(new_pred)) {
4435 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4439 if (is_ia32_xDiv(new_pred)) {
4440 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4441 } else if (is_ia32_vfdiv(new_pred)) {
4442 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4445 case pn_Quot_X_regular:
4446 case pn_Quot_X_except:
4451 panic("No idea how to transform proj->Quot");
4454 static ir_node *gen_be_Call(ir_node *node)
4456 dbg_info *const dbgi = get_irn_dbg_info(node);
4457 ir_graph *const irg = current_ir_graph;
4458 ir_node *const src_block = get_nodes_block(node);
4459 ir_node *const block = be_transform_node(src_block);
4460 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4461 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4462 ir_node *const sp = be_transform_node(src_sp);
4463 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4464 ia32_address_mode_t am;
4465 ia32_address_t *const addr = &am.addr;
4470 ir_node * eax = noreg_GP;
4471 ir_node * ecx = noreg_GP;
4472 ir_node * edx = noreg_GP;
4473 unsigned const pop = be_Call_get_pop(node);
4474 ir_type *const call_tp = be_Call_get_type(node);
4475 int old_no_pic_adjust;
4477 /* Run the x87 simulator if the call returns a float value */
4478 if (get_method_n_ress(call_tp) > 0) {
4479 ir_type *const res_type = get_method_res_type(call_tp, 0);
4480 ir_mode *const res_mode = get_type_mode(res_type);
4482 if (res_mode != NULL && mode_is_float(res_mode)) {
4483 env_cg->do_x87_sim = 1;
4487 /* We do not want be_Call direct calls */
4488 assert(be_Call_get_entity(node) == NULL);
4490 /* special case for PIC trampoline calls */
4491 old_no_pic_adjust = no_pic_adjust;
4492 no_pic_adjust = env_cg->birg->main_env->options->pic;
4494 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4495 match_am | match_immediate);
4497 no_pic_adjust = old_no_pic_adjust;
4499 i = get_irn_arity(node) - 1;
4500 fpcw = be_transform_node(get_irn_n(node, i--));
4501 for (; i >= be_pos_Call_first_arg; --i) {
4502 arch_register_req_t const *const req = arch_get_register_req(node, i);
4503 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4505 assert(req->type == arch_register_req_type_limited);
4506 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4508 switch (*req->limited) {
4509 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4510 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4511 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4512 default: panic("Invalid GP register for register parameter");
4516 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4517 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4518 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4519 set_am_attributes(call, &am);
4520 call = fix_mem_proj(call, &am);
4522 if (get_irn_pinned(node) == op_pin_state_pinned)
4523 set_irn_pinned(call, op_pin_state_pinned);
4525 SET_IA32_ORIG_NODE(call, node);
4527 if (ia32_cg_config.use_sse2) {
4528 /* remember this call for post-processing */
4529 ARR_APP1(ir_node *, call_list, call);
4530 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4537 * Transform Builtin trap
4539 static ir_node *gen_trap(ir_node *node) {
4540 dbg_info *dbgi = get_irn_dbg_info(node);
4541 ir_node *block = be_transform_node(get_nodes_block(node));
4542 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4544 return new_bd_ia32_UD2(dbgi, block, mem);
4548 * Transform Builtin debugbreak
4550 static ir_node *gen_debugbreak(ir_node *node) {
4551 dbg_info *dbgi = get_irn_dbg_info(node);
4552 ir_node *block = be_transform_node(get_nodes_block(node));
4553 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4555 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4559 * Transform Builtin return_address
4561 static ir_node *gen_return_address(ir_node *node) {
4562 ir_node *param = get_Builtin_param(node, 0);
4563 ir_node *frame = get_Builtin_param(node, 1);
4564 dbg_info *dbgi = get_irn_dbg_info(node);
4565 tarval *tv = get_Const_tarval(param);
4566 unsigned long value = get_tarval_long(tv);
4568 ir_node *block = be_transform_node(get_nodes_block(node));
4569 ir_node *ptr = be_transform_node(frame);
4573 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4574 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4575 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4578 /* load the return address from this frame */
4579 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4581 set_irn_pinned(load, get_irn_pinned(node));
4582 set_ia32_op_type(load, ia32_AddrModeS);
4583 set_ia32_ls_mode(load, mode_Iu);
4585 set_ia32_am_offs_int(load, 0);
4586 set_ia32_use_frame(load);
4587 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4589 if (get_irn_pinned(node) == op_pin_state_floats) {
4590 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4591 && pn_ia32_vfld_res == pn_ia32_Load_res
4592 && pn_ia32_Load_res == pn_ia32_res);
4593 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4596 SET_IA32_ORIG_NODE(load, node);
4597 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4601 * Transform Builtin frame_address
4603 static ir_node *gen_frame_address(ir_node *node) {
4604 ir_node *param = get_Builtin_param(node, 0);
4605 ir_node *frame = get_Builtin_param(node, 1);
4606 dbg_info *dbgi = get_irn_dbg_info(node);
4607 tarval *tv = get_Const_tarval(param);
4608 unsigned long value = get_tarval_long(tv);
4610 ir_node *block = be_transform_node(get_nodes_block(node));
4611 ir_node *ptr = be_transform_node(frame);
4616 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4617 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4618 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4621 /* load the frame address from this frame */
4622 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4624 set_irn_pinned(load, get_irn_pinned(node));
4625 set_ia32_op_type(load, ia32_AddrModeS);
4626 set_ia32_ls_mode(load, mode_Iu);
4628 ent = ia32_get_frame_address_entity();
4630 set_ia32_am_offs_int(load, 0);
4631 set_ia32_use_frame(load);
4632 set_ia32_frame_ent(load, ent);
4634 /* will fail anyway, but gcc does this: */
4635 set_ia32_am_offs_int(load, 0);
4638 if (get_irn_pinned(node) == op_pin_state_floats) {
4639 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4640 && pn_ia32_vfld_res == pn_ia32_Load_res
4641 && pn_ia32_Load_res == pn_ia32_res);
4642 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4645 SET_IA32_ORIG_NODE(load, node);
4646 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4650 * Transform Builtin frame_address
4652 static ir_node *gen_prefetch(ir_node *node) {
4654 ir_node *ptr, *block, *mem, *base, *index;
4655 ir_node *param, *new_node;
4658 ia32_address_t addr;
4660 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4661 /* no prefetch at all, route memory */
4662 return be_transform_node(get_Builtin_mem(node));
4665 param = get_Builtin_param(node, 1);
4666 tv = get_Const_tarval(param);
4667 rw = get_tarval_long(tv);
4669 /* construct load address */
4670 memset(&addr, 0, sizeof(addr));
4671 ptr = get_Builtin_param(node, 0);
4672 ia32_create_address_mode(&addr, ptr, 0);
4679 base = be_transform_node(base);
4682 if (index == NULL) {
4685 index = be_transform_node(index);
4688 dbgi = get_irn_dbg_info(node);
4689 block = be_transform_node(get_nodes_block(node));
4690 mem = be_transform_node(get_Builtin_mem(node));
4692 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4693 /* we have 3DNow!, this was already checked above */
4694 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4695 } else if (ia32_cg_config.use_sse_prefetch) {
4696 /* note: rw == 1 is IGNORED in that case */
4697 param = get_Builtin_param(node, 2);
4698 tv = get_Const_tarval(param);
4699 locality = get_tarval_long(tv);
4701 /* SSE style prefetch */
4704 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4707 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4710 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4713 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4717 assert(ia32_cg_config.use_3dnow_prefetch);
4718 /* 3DNow! style prefetch */
4719 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4722 set_irn_pinned(new_node, get_irn_pinned(node));
4723 set_ia32_op_type(new_node, ia32_AddrModeS);
4724 set_ia32_ls_mode(new_node, mode_Bu);
4725 set_address(new_node, &addr);
4727 SET_IA32_ORIG_NODE(new_node, node);
4729 be_dep_on_frame(new_node);
4730 return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M);
4734 * Transform bsf like node
4736 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4738 ir_node *param = get_Builtin_param(node, 0);
4739 dbg_info *dbgi = get_irn_dbg_info(node);
4741 ir_node *block = get_nodes_block(node);
4742 ir_node *new_block = be_transform_node(block);
4744 ia32_address_mode_t am;
4745 ia32_address_t *addr = &am.addr;
4748 match_arguments(&am, block, NULL, param, NULL, match_am);
4750 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4751 set_am_attributes(cnt, &am);
4752 set_ia32_ls_mode(cnt, get_irn_mode(param));
4754 SET_IA32_ORIG_NODE(cnt, node);
4755 return fix_mem_proj(cnt, &am);
4759 * Transform builtin ffs.
4761 static ir_node *gen_ffs(ir_node *node)
4763 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4764 ir_node *real = skip_Proj(bsf);
4765 dbg_info *dbgi = get_irn_dbg_info(real);
4766 ir_node *block = get_nodes_block(real);
4767 ir_node *flag, *set, *conv, *neg, *or;
4770 if (get_irn_mode(real) != mode_T) {
4771 set_irn_mode(real, mode_T);
4772 bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res);
4775 flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags);
4778 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4779 SET_IA32_ORIG_NODE(set, node);
4782 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4783 SET_IA32_ORIG_NODE(conv, node);
4786 neg = new_bd_ia32_Neg(dbgi, block, conv);
4789 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4790 set_ia32_commutative(or);
4793 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4797 * Transform builtin clz.
4799 static ir_node *gen_clz(ir_node *node)
4801 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4802 ir_node *real = skip_Proj(bsr);
4803 dbg_info *dbgi = get_irn_dbg_info(real);
4804 ir_node *block = get_nodes_block(real);
4805 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4807 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4811 * Transform builtin ctz.
4813 static ir_node *gen_ctz(ir_node *node)
4815 return gen_unop_AM(node, new_bd_ia32_Bsf);
4819 * Transform builtin parity.
4821 static ir_node *gen_parity(ir_node *node)
4823 ir_node *param = get_Builtin_param(node, 0);
4824 dbg_info *dbgi = get_irn_dbg_info(node);
4826 ir_node *block = get_nodes_block(node);
4828 ir_node *new_block = be_transform_node(block);
4829 ir_node *imm, *cmp, *new_node;
4831 ia32_address_mode_t am;
4832 ia32_address_t *addr = &am.addr;
4836 match_arguments(&am, block, NULL, param, NULL, match_am);
4837 imm = ia32_create_Immediate(NULL, 0, 0);
4838 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4839 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4840 set_am_attributes(cmp, &am);
4841 set_ia32_ls_mode(cmp, mode_Iu);
4843 SET_IA32_ORIG_NODE(cmp, node);
4845 cmp = fix_mem_proj(cmp, &am);
4848 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4849 SET_IA32_ORIG_NODE(new_node, node);
4852 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4853 nomem, new_node, mode_Bu);
4854 SET_IA32_ORIG_NODE(new_node, node);
4859 * Transform builtin popcount
4861 static ir_node *gen_popcount(ir_node *node) {
4862 ir_node *param = get_Builtin_param(node, 0);
4863 dbg_info *dbgi = get_irn_dbg_info(node);
4865 ir_node *block = get_nodes_block(node);
4866 ir_node *new_block = be_transform_node(block);
4869 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4871 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4872 if (ia32_cg_config.use_popcnt) {
4873 ia32_address_mode_t am;
4874 ia32_address_t *addr = &am.addr;
4877 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4879 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4880 set_am_attributes(cnt, &am);
4881 set_ia32_ls_mode(cnt, get_irn_mode(param));
4883 SET_IA32_ORIG_NODE(cnt, node);
4884 return fix_mem_proj(cnt, &am);
4887 new_param = be_transform_node(param);
4889 /* do the standard popcount algo */
4891 /* m1 = x & 0x55555555 */
4892 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4893 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4896 simm = ia32_create_Immediate(NULL, 0, 1);
4897 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4899 /* m2 = s1 & 0x55555555 */
4900 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4903 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4905 /* m4 = m3 & 0x33333333 */
4906 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4907 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4910 simm = ia32_create_Immediate(NULL, 0, 2);
4911 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4913 /* m5 = s2 & 0x33333333 */
4914 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4917 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4919 /* m7 = m6 & 0x0F0F0F0F */
4920 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4921 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4924 simm = ia32_create_Immediate(NULL, 0, 4);
4925 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4927 /* m8 = s3 & 0x0F0F0F0F */
4928 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4931 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4933 /* m10 = m9 & 0x00FF00FF */
4934 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4935 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4938 simm = ia32_create_Immediate(NULL, 0, 8);
4939 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4941 /* m11 = s4 & 0x00FF00FF */
4942 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4944 /* m12 = m10 + m11 */
4945 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4947 /* m13 = m12 & 0x0000FFFF */
4948 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4949 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4951 /* s5 = m12 >> 16 */
4952 simm = ia32_create_Immediate(NULL, 0, 16);
4953 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4955 /* res = m13 + s5 */
4956 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4960 * Transform builtin byte swap.
4962 static ir_node *gen_bswap(ir_node *node) {
4963 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4964 dbg_info *dbgi = get_irn_dbg_info(node);
4966 ir_node *block = get_nodes_block(node);
4967 ir_node *new_block = be_transform_node(block);
4968 ir_mode *mode = get_irn_mode(param);
4969 unsigned size = get_mode_size_bits(mode);
4970 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4974 if (ia32_cg_config.use_i486) {
4975 /* swap available */
4976 return new_bd_ia32_Bswap(dbgi, new_block, param);
4978 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4979 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4981 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4982 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4984 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4986 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4987 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4989 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4990 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4993 /* swap16 always available */
4994 return new_bd_ia32_Bswap16(dbgi, new_block, param);
4997 panic("Invalid bswap size (%d)", size);
5002 * Transform builtin outport.
5004 static ir_node *gen_outport(ir_node *node) {
5005 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5006 ir_node *oldv = get_Builtin_param(node, 1);
5007 ir_mode *mode = get_irn_mode(oldv);
5008 ir_node *value = be_transform_node(oldv);
5009 ir_node *block = be_transform_node(get_nodes_block(node));
5010 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5011 dbg_info *dbgi = get_irn_dbg_info(node);
5013 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5014 set_ia32_ls_mode(res, mode);
5019 * Transform builtin inport.
5021 static ir_node *gen_inport(ir_node *node) {
5022 ir_type *tp = get_Builtin_type(node);
5023 ir_type *rstp = get_method_res_type(tp, 0);
5024 ir_mode *mode = get_type_mode(rstp);
5025 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5026 ir_node *block = be_transform_node(get_nodes_block(node));
5027 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5028 dbg_info *dbgi = get_irn_dbg_info(node);
5030 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5031 set_ia32_ls_mode(res, mode);
5033 /* check for missing Result Proj */
5038 * Transform a builtin inner trampoline
5040 static ir_node *gen_inner_trampoline(ir_node *node) {
5041 ir_node *ptr = get_Builtin_param(node, 0);
5042 ir_node *callee = get_Builtin_param(node, 1);
5043 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5044 ir_node *mem = get_Builtin_mem(node);
5045 ir_node *block = get_nodes_block(node);
5046 ir_node *new_block = be_transform_node(block);
5050 ir_node *trampoline;
5052 dbg_info *dbgi = get_irn_dbg_info(node);
5053 ia32_address_t addr;
5055 /* construct store address */
5056 memset(&addr, 0, sizeof(addr));
5057 ia32_create_address_mode(&addr, ptr, 0);
5059 if (addr.base == NULL) {
5060 addr.base = noreg_GP;
5062 addr.base = be_transform_node(addr.base);
5065 if (addr.index == NULL) {
5066 addr.index = noreg_GP;
5068 addr.index = be_transform_node(addr.index);
5070 addr.mem = be_transform_node(mem);
5072 /* mov ecx, <env> */
5073 val = ia32_create_Immediate(NULL, 0, 0xB9);
5074 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5075 addr.index, addr.mem, val);
5076 set_irn_pinned(store, get_irn_pinned(node));
5077 set_ia32_op_type(store, ia32_AddrModeD);
5078 set_ia32_ls_mode(store, mode_Bu);
5079 set_address(store, &addr);
5083 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5084 addr.index, addr.mem, env);
5085 set_irn_pinned(store, get_irn_pinned(node));
5086 set_ia32_op_type(store, ia32_AddrModeD);
5087 set_ia32_ls_mode(store, mode_Iu);
5088 set_address(store, &addr);
5092 /* jmp rel <callee> */
5093 val = ia32_create_Immediate(NULL, 0, 0xE9);
5094 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5095 addr.index, addr.mem, val);
5096 set_irn_pinned(store, get_irn_pinned(node));
5097 set_ia32_op_type(store, ia32_AddrModeD);
5098 set_ia32_ls_mode(store, mode_Bu);
5099 set_address(store, &addr);
5103 trampoline = be_transform_node(ptr);
5105 /* the callee is typically an immediate */
5106 if (is_SymConst(callee)) {
5107 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5109 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5111 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5113 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5114 addr.index, addr.mem, rel);
5115 set_irn_pinned(store, get_irn_pinned(node));
5116 set_ia32_op_type(store, ia32_AddrModeD);
5117 set_ia32_ls_mode(store, mode_Iu);
5118 set_address(store, &addr);
5123 return new_r_Tuple(current_ir_graph, new_block, 2, in);
5127 * Transform Builtin node.
5129 static ir_node *gen_Builtin(ir_node *node) {
5130 ir_builtin_kind kind = get_Builtin_kind(node);
5134 return gen_trap(node);
5135 case ir_bk_debugbreak:
5136 return gen_debugbreak(node);
5137 case ir_bk_return_address:
5138 return gen_return_address(node);
5139 case ir_bk_frame_address:
5140 return gen_frame_address(node);
5141 case ir_bk_prefetch:
5142 return gen_prefetch(node);
5144 return gen_ffs(node);
5146 return gen_clz(node);
5148 return gen_ctz(node);
5150 return gen_parity(node);
5151 case ir_bk_popcount:
5152 return gen_popcount(node);
5154 return gen_bswap(node);
5156 return gen_outport(node);
5158 return gen_inport(node);
5159 case ir_bk_inner_trampoline:
5160 return gen_inner_trampoline(node);
5162 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5166 * Transform Proj(Builtin) node.
5168 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5169 ir_node *node = get_Proj_pred(proj);
5170 ir_node *new_node = be_transform_node(node);
5171 ir_builtin_kind kind = get_Builtin_kind(node);
5174 case ir_bk_return_address:
5175 case ir_bk_frame_address:
5180 case ir_bk_popcount:
5182 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5185 case ir_bk_debugbreak:
5186 case ir_bk_prefetch:
5188 assert(get_Proj_proj(proj) == pn_Builtin_M);
5191 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5192 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5193 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5195 assert(get_Proj_proj(proj) == pn_Builtin_M);
5196 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5197 new_node, mode_M, pn_ia32_Inport_M);
5199 case ir_bk_inner_trampoline:
5200 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5201 return get_Tuple_pred(new_node, 1);
5203 assert(get_Proj_proj(proj) == pn_Builtin_M);
5204 return get_Tuple_pred(new_node, 0);
5207 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5210 static ir_node *gen_be_IncSP(ir_node *node)
5212 ir_node *res = be_duplicate_node(node);
5213 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5219 * Transform the Projs from a be_Call.
5221 static ir_node *gen_Proj_be_Call(ir_node *node)
5223 ir_node *block = be_transform_node(get_nodes_block(node));
5224 ir_node *call = get_Proj_pred(node);
5225 ir_node *new_call = be_transform_node(call);
5226 ir_graph *irg = current_ir_graph;
5227 dbg_info *dbgi = get_irn_dbg_info(node);
5228 long proj = get_Proj_proj(node);
5229 ir_mode *mode = get_irn_mode(node);
5232 if (proj == pn_be_Call_M_regular) {
5233 return new_rd_Proj(dbgi, irg, block, new_call, mode_M, n_ia32_Call_mem);
5235 /* transform call modes */
5236 if (mode_is_data(mode)) {
5237 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5241 /* Map from be_Call to ia32_Call proj number */
5242 if (proj == pn_be_Call_sp) {
5243 proj = pn_ia32_Call_stack;
5244 } else if (proj == pn_be_Call_M_regular) {
5245 proj = pn_ia32_Call_M;
5247 arch_register_req_t const *const req = arch_get_register_req_out(node);
5248 int const n_outs = arch_irn_get_n_outs(new_call);
5251 assert(proj >= pn_be_Call_first_res);
5252 assert(req->type & arch_register_req_type_limited);
5254 for (i = 0; i < n_outs; ++i) {
5255 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5257 if (!(new_req->type & arch_register_req_type_limited) ||
5258 new_req->cls != req->cls ||
5259 *new_req->limited != *req->limited)
5268 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
5270 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5272 case pn_ia32_Call_stack:
5273 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5276 case pn_ia32_Call_fpcw:
5277 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5285 * Transform the Projs from a Cmp.
5287 static ir_node *gen_Proj_Cmp(ir_node *node)
5289 /* this probably means not all mode_b nodes were lowered... */
5290 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5295 * Transform the Projs from a Bound.
5297 static ir_node *gen_Proj_Bound(ir_node *node)
5299 ir_node *new_node, *block;
5300 ir_node *pred = get_Proj_pred(node);
5302 switch (get_Proj_proj(node)) {
5304 return be_transform_node(get_Bound_mem(pred));
5305 case pn_Bound_X_regular:
5306 new_node = be_transform_node(pred);
5307 block = get_nodes_block(new_node);
5308 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
5309 case pn_Bound_X_except:
5310 new_node = be_transform_node(pred);
5311 block = get_nodes_block(new_node);
5312 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
5314 return be_transform_node(get_Bound_index(pred));
5316 panic("unsupported Proj from Bound");
5320 static ir_node *gen_Proj_ASM(ir_node *node)
5322 ir_mode *mode = get_irn_mode(node);
5323 ir_node *pred = get_Proj_pred(node);
5324 ir_node *new_pred = be_transform_node(pred);
5325 ir_node *block = get_nodes_block(new_pred);
5326 long pos = get_Proj_proj(node);
5328 if (mode == mode_M) {
5329 pos = arch_irn_get_n_outs(new_pred) + 1;
5330 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5332 } else if (mode_is_float(mode)) {
5335 panic("unexpected proj mode at ASM");
5338 return new_r_Proj(current_ir_graph, block, new_pred, mode, pos);
5342 * Transform and potentially renumber Proj nodes.
5344 static ir_node *gen_Proj(ir_node *node)
5346 ir_node *pred = get_Proj_pred(node);
5349 switch (get_irn_opcode(pred)) {
5351 proj = get_Proj_proj(node);
5352 if (proj == pn_Store_M) {
5353 return be_transform_node(pred);
5355 panic("No idea how to transform proj->Store");
5358 return gen_Proj_Load(node);
5360 return gen_Proj_ASM(node);
5362 return gen_Proj_Builtin(node);
5366 return gen_Proj_DivMod(node);
5368 return gen_Proj_CopyB(node);
5370 return gen_Proj_Quot(node);
5372 return gen_Proj_be_SubSP(node);
5374 return gen_Proj_be_AddSP(node);
5376 return gen_Proj_be_Call(node);
5378 return gen_Proj_Cmp(node);
5380 return gen_Proj_Bound(node);
5382 proj = get_Proj_proj(node);
5384 case pn_Start_X_initial_exec: {
5385 ir_node *block = get_nodes_block(pred);
5386 ir_node *new_block = be_transform_node(block);
5387 dbg_info *dbgi = get_irn_dbg_info(node);
5388 /* we exchange the ProjX with a jump */
5389 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
5394 case pn_Start_P_tls:
5395 return gen_Proj_tls(node);
5400 if (is_ia32_l_FloattoLL(pred)) {
5401 return gen_Proj_l_FloattoLL(node);
5403 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5407 ir_mode *mode = get_irn_mode(node);
5408 if (ia32_mode_needs_gp_reg(mode)) {
5409 ir_node *new_pred = be_transform_node(pred);
5410 ir_node *block = be_transform_node(get_nodes_block(node));
5411 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5412 mode_Iu, get_Proj_proj(node));
5413 new_proj->node_nr = node->node_nr;
5418 return be_duplicate_node(node);
5422 * Enters all transform functions into the generic pointer
5424 static void register_transformers(void)
5426 /* first clear the generic function pointer for all ops */
5427 clear_irp_opcodes_generic_func();
5429 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5430 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5469 /* transform ops from intrinsic lowering */
5481 GEN(ia32_l_LLtoFloat);
5482 GEN(ia32_l_FloattoLL);
5488 /* we should never see these nodes */
5503 /* handle builtins */
5506 /* handle generic backend nodes */
5520 * Pre-transform all unknown and noreg nodes.
5522 static void ia32_pretransform_node(void)
5524 ia32_code_gen_t *cg = env_cg;
5526 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5527 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5528 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5529 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5530 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5531 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5533 nomem = get_irg_no_mem(current_ir_graph);
5534 noreg_GP = ia32_new_NoReg_gp(cg);
5540 * Walker, checks if all ia32 nodes producing more than one result have their
5541 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5543 static void add_missing_keep_walker(ir_node *node, void *data)
5546 unsigned found_projs = 0;
5547 const ir_edge_t *edge;
5548 ir_mode *mode = get_irn_mode(node);
5553 if (!is_ia32_irn(node))
5556 n_outs = arch_irn_get_n_outs(node);
5559 if (is_ia32_SwitchJmp(node))
5562 assert(n_outs < (int) sizeof(unsigned) * 8);
5563 foreach_out_edge(node, edge) {
5564 ir_node *proj = get_edge_src_irn(edge);
5567 /* The node could be kept */
5571 if (get_irn_mode(proj) == mode_M)
5574 pn = get_Proj_proj(proj);
5575 assert(pn < n_outs);
5576 found_projs |= 1 << pn;
5580 /* are keeps missing? */
5582 for (i = 0; i < n_outs; ++i) {
5585 const arch_register_req_t *req;
5586 const arch_register_class_t *cls;
5588 if (found_projs & (1 << i)) {
5592 req = get_ia32_out_req(node, i);
5597 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5601 block = get_nodes_block(node);
5602 in[0] = new_r_Proj(current_ir_graph, block, node,
5603 arch_register_class_mode(cls), i);
5604 if (last_keep != NULL) {
5605 be_Keep_add_node(last_keep, cls, in[0]);
5607 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
5608 if (sched_is_scheduled(node)) {
5609 sched_add_after(node, last_keep);
5616 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5619 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5621 ir_graph *irg = be_get_birg_irg(cg->birg);
5622 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5626 * Post-process all calls if we are in SSE mode.
5627 * The ABI requires that the results are in st0, copy them
5628 * to a xmm register.
5630 static void postprocess_fp_call_results(void) {
5633 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5634 ir_node *call = call_list[i];
5635 ir_type *mtp = call_types[i];
5638 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5639 ir_type *res_tp = get_method_res_type(mtp, j);
5640 ir_node *res, *new_res;
5641 const ir_edge_t *edge, *next;
5644 if (! is_atomic_type(res_tp)) {
5645 /* no floating point return */
5648 mode = get_type_mode(res_tp);
5649 if (! mode_is_float(mode)) {
5650 /* no floating point return */
5654 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5657 /* now patch the users */
5658 foreach_out_edge_safe(res, edge, next) {
5659 ir_node *succ = get_edge_src_irn(edge);
5662 if (be_is_Keep(succ))
5665 if (is_ia32_xStore(succ)) {
5666 /* an xStore can be patched into an vfst */
5667 dbg_info *db = get_irn_dbg_info(succ);
5668 ir_node *block = get_nodes_block(succ);
5669 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5670 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5671 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5672 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5673 ir_mode *mode = get_ia32_ls_mode(succ);
5675 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5676 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5677 if (is_ia32_use_frame(succ))
5678 set_ia32_use_frame(st);
5679 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5680 set_irn_pinned(st, get_irn_pinned(succ));
5681 set_ia32_op_type(st, ia32_AddrModeD);
5685 if (new_res == NULL) {
5686 dbg_info *db = get_irn_dbg_info(call);
5687 ir_node *block = get_nodes_block(call);
5688 ir_node *frame = get_irg_frame(current_ir_graph);
5689 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5690 ir_node *call_mem = new_r_Proj(current_ir_graph, block, call, mode_M, pn_ia32_Call_M);
5691 ir_node *vfst, *xld, *new_mem;
5693 /* store st(0) on stack */
5694 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5695 set_ia32_op_type(vfst, ia32_AddrModeD);
5696 set_ia32_use_frame(vfst);
5698 /* load into SSE register */
5699 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5700 set_ia32_op_type(xld, ia32_AddrModeS);
5701 set_ia32_use_frame(xld);
5703 new_res = new_r_Proj(current_ir_graph, block, xld, mode, pn_ia32_xLoad_res);
5704 new_mem = new_r_Proj(current_ir_graph, block, xld, mode_M, pn_ia32_xLoad_M);
5706 if (old_mem != NULL) {
5707 edges_reroute(old_mem, new_mem, current_ir_graph);
5711 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5718 /* do the transformation */
5719 void ia32_transform_graph(ia32_code_gen_t *cg)
5723 register_transformers();
5725 initial_fpcw = NULL;
5728 BE_TIMER_PUSH(t_heights);
5729 heights = heights_new(cg->irg);
5730 BE_TIMER_POP(t_heights);
5731 ia32_calculate_non_address_mode_nodes(cg->birg);
5733 /* the transform phase is not safe for CSE (yet) because several nodes get
5734 * attributes set after their creation */
5735 cse_last = get_opt_cse();
5738 call_list = NEW_ARR_F(ir_node *, 0);
5739 call_types = NEW_ARR_F(ir_type *, 0);
5740 be_transform_graph(cg->birg, ia32_pretransform_node);
5742 if (ia32_cg_config.use_sse2)
5743 postprocess_fp_call_results();
5744 DEL_ARR_F(call_types);
5745 DEL_ARR_F(call_list);
5747 set_opt_cse(cse_last);
5749 ia32_free_non_address_mode_nodes();
5750 heights_free(heights);
5754 void ia32_init_transform(void)
5756 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");