2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (mode_is_signed(mode)) {
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1301 mode_Iu, pn_ia32_IMul1OP_res_high);
1303 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1304 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1305 mode_Iu, pn_ia32_Mul_res_high);
1307 return proj_res_high;
1311 * Creates an ia32 And.
1313 * @return The created ia32 And node
1315 static ir_node *gen_And(ir_node *node)
1317 ir_node *op1 = get_And_left(node);
1318 ir_node *op2 = get_And_right(node);
1319 assert(! mode_is_float(get_irn_mode(node)));
1321 /* is it a zero extension? */
1322 if (is_Const(op2)) {
1323 tarval *tv = get_Const_tarval(op2);
1324 long v = get_tarval_long(tv);
1326 if (v == 0xFF || v == 0xFFFF) {
1327 dbg_info *dbgi = get_irn_dbg_info(node);
1328 ir_node *block = get_nodes_block(node);
1335 assert(v == 0xFFFF);
1338 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1343 return gen_binop(node, op1, op2, new_bd_ia32_And,
1344 match_commutative | match_mode_neutral | match_am | match_immediate);
1350 * Creates an ia32 Or.
1352 * @return The created ia32 Or node
1354 static ir_node *gen_Or(ir_node *node)
1356 ir_node *op1 = get_Or_left(node);
1357 ir_node *op2 = get_Or_right(node);
1359 assert (! mode_is_float(get_irn_mode(node)));
1360 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1361 | match_mode_neutral | match_am | match_immediate);
1367 * Creates an ia32 Eor.
1369 * @return The created ia32 Eor node
1371 static ir_node *gen_Eor(ir_node *node)
1373 ir_node *op1 = get_Eor_left(node);
1374 ir_node *op2 = get_Eor_right(node);
1376 assert(! mode_is_float(get_irn_mode(node)));
1377 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1378 | match_mode_neutral | match_am | match_immediate);
1383 * Creates an ia32 Sub.
1385 * @return The created ia32 Sub node
1387 static ir_node *gen_Sub(ir_node *node)
1389 ir_node *op1 = get_Sub_left(node);
1390 ir_node *op2 = get_Sub_right(node);
1391 ir_mode *mode = get_irn_mode(node);
1393 if (mode_is_float(mode)) {
1394 if (ia32_cg_config.use_sse2)
1395 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1397 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1400 if (is_Const(op2)) {
1401 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1405 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1406 | match_am | match_immediate);
1409 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1410 ir_node *const src_val,
1411 ir_node *const src_mem,
1412 ir_node *const am_mem)
1414 if (is_NoMem(am_mem)) {
1415 return be_transform_node(src_mem);
1416 } else if (is_Proj(src_val) &&
1418 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1419 /* avoid memory loop */
1421 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1422 ir_node *const ptr_pred = get_Proj_pred(src_val);
1423 int const arity = get_Sync_n_preds(src_mem);
1428 NEW_ARR_A(ir_node*, ins, arity + 1);
1430 /* NOTE: This sometimes produces dead-code because the old sync in
1431 * src_mem might not be used anymore, we should detect this case
1432 * and kill the sync... */
1433 for (i = arity - 1; i >= 0; --i) {
1434 ir_node *const pred = get_Sync_pred(src_mem, i);
1436 /* avoid memory loop */
1437 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1440 ins[n++] = be_transform_node(pred);
1445 return new_r_Sync(irg, block, n, ins);
1449 ins[0] = be_transform_node(src_mem);
1451 return new_r_Sync(irg, block, 2, ins);
1456 * Create a 32bit to 64bit signed extension.
1458 * @param dbgi debug info
1459 * @param block the block where node nodes should be placed
1460 * @param val the value to extend
1461 * @param orig the original node
1463 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1464 ir_node *val, const ir_node *orig)
1469 if (ia32_cg_config.use_short_sex_eax) {
1470 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1471 be_dep_on_frame(pval);
1472 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1474 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1475 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1477 SET_IA32_ORIG_NODE(res, orig);
1482 * Generates an ia32 DivMod with additional infrastructure for the
1483 * register allocator if needed.
1485 static ir_node *create_Div(ir_node *node)
1487 dbg_info *dbgi = get_irn_dbg_info(node);
1488 ir_node *block = get_nodes_block(node);
1489 ir_node *new_block = be_transform_node(block);
1496 ir_node *sign_extension;
1497 ia32_address_mode_t am;
1498 ia32_address_t *addr = &am.addr;
1500 /* the upper bits have random contents for smaller modes */
1501 switch (get_irn_opcode(node)) {
1503 op1 = get_Div_left(node);
1504 op2 = get_Div_right(node);
1505 mem = get_Div_mem(node);
1506 mode = get_Div_resmode(node);
1509 op1 = get_Mod_left(node);
1510 op2 = get_Mod_right(node);
1511 mem = get_Mod_mem(node);
1512 mode = get_Mod_resmode(node);
1515 op1 = get_DivMod_left(node);
1516 op2 = get_DivMod_right(node);
1517 mem = get_DivMod_mem(node);
1518 mode = get_DivMod_resmode(node);
1521 panic("invalid divmod node %+F", node);
1524 match_arguments(&am, block, op1, op2, NULL, match_am);
1526 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1527 is the memory of the consumed address. We can have only the second op as address
1528 in Div nodes, so check only op2. */
1529 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1531 if (mode_is_signed(mode)) {
1532 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1533 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1534 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1536 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1537 be_dep_on_frame(sign_extension);
1539 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1540 addr->index, new_mem, am.new_op2,
1541 am.new_op1, sign_extension);
1544 set_irn_pinned(new_node, get_irn_pinned(node));
1546 set_am_attributes(new_node, &am);
1547 SET_IA32_ORIG_NODE(new_node, node);
1549 new_node = fix_mem_proj(new_node, &am);
1555 * Generates an ia32 Mod.
1557 static ir_node *gen_Mod(ir_node *node)
1559 return create_Div(node);
1563 * Generates an ia32 Div.
1565 static ir_node *gen_Div(ir_node *node)
1567 return create_Div(node);
1571 * Generates an ia32 DivMod.
1573 static ir_node *gen_DivMod(ir_node *node)
1575 return create_Div(node);
1581 * Creates an ia32 floating Div.
1583 * @return The created ia32 xDiv node
1585 static ir_node *gen_Quot(ir_node *node)
1587 ir_node *op1 = get_Quot_left(node);
1588 ir_node *op2 = get_Quot_right(node);
1590 if (ia32_cg_config.use_sse2) {
1591 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1593 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1599 * Creates an ia32 Shl.
1601 * @return The created ia32 Shl node
1603 static ir_node *gen_Shl(ir_node *node)
1605 ir_node *left = get_Shl_left(node);
1606 ir_node *right = get_Shl_right(node);
1608 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1609 match_mode_neutral | match_immediate);
1613 * Creates an ia32 Shr.
1615 * @return The created ia32 Shr node
1617 static ir_node *gen_Shr(ir_node *node)
1619 ir_node *left = get_Shr_left(node);
1620 ir_node *right = get_Shr_right(node);
1622 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1628 * Creates an ia32 Sar.
1630 * @return The created ia32 Shrs node
1632 static ir_node *gen_Shrs(ir_node *node)
1634 ir_node *left = get_Shrs_left(node);
1635 ir_node *right = get_Shrs_right(node);
1637 if (is_Const(right)) {
1638 tarval *tv = get_Const_tarval(right);
1639 long val = get_tarval_long(tv);
1641 /* this is a sign extension */
1642 dbg_info *dbgi = get_irn_dbg_info(node);
1643 ir_node *block = be_transform_node(get_nodes_block(node));
1644 ir_node *new_op = be_transform_node(left);
1646 return create_sex_32_64(dbgi, block, new_op, node);
1650 /* 8 or 16 bit sign extension? */
1651 if (is_Const(right) && is_Shl(left)) {
1652 ir_node *shl_left = get_Shl_left(left);
1653 ir_node *shl_right = get_Shl_right(left);
1654 if (is_Const(shl_right)) {
1655 tarval *tv1 = get_Const_tarval(right);
1656 tarval *tv2 = get_Const_tarval(shl_right);
1657 if (tv1 == tv2 && tarval_is_long(tv1)) {
1658 long val = get_tarval_long(tv1);
1659 if (val == 16 || val == 24) {
1660 dbg_info *dbgi = get_irn_dbg_info(node);
1661 ir_node *block = get_nodes_block(node);
1671 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1680 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1686 * Creates an ia32 Rol.
1688 * @param op1 The first operator
1689 * @param op2 The second operator
1690 * @return The created ia32 RotL node
1692 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1694 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1700 * Creates an ia32 Ror.
1701 * NOTE: There is no RotR with immediate because this would always be a RotL
1702 * "imm-mode_size_bits" which can be pre-calculated.
1704 * @param op1 The first operator
1705 * @param op2 The second operator
1706 * @return The created ia32 RotR node
1708 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1710 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1716 * Creates an ia32 RotR or RotL (depending on the found pattern).
1718 * @return The created ia32 RotL or RotR node
1720 static ir_node *gen_Rotl(ir_node *node)
1722 ir_node *rotate = NULL;
1723 ir_node *op1 = get_Rotl_left(node);
1724 ir_node *op2 = get_Rotl_right(node);
1726 /* Firm has only RotL, so we are looking for a right (op2)
1727 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1728 that means we can create a RotR instead of an Add and a RotL */
1732 ir_node *left = get_Add_left(add);
1733 ir_node *right = get_Add_right(add);
1734 if (is_Const(right)) {
1735 tarval *tv = get_Const_tarval(right);
1736 ir_mode *mode = get_irn_mode(node);
1737 long bits = get_mode_size_bits(mode);
1739 if (is_Minus(left) &&
1740 tarval_is_long(tv) &&
1741 get_tarval_long(tv) == bits &&
1744 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1745 rotate = gen_Ror(node, op1, get_Minus_op(left));
1750 if (rotate == NULL) {
1751 rotate = gen_Rol(node, op1, op2);
1760 * Transforms a Minus node.
1762 * @return The created ia32 Minus node
1764 static ir_node *gen_Minus(ir_node *node)
1766 ir_node *op = get_Minus_op(node);
1767 ir_node *block = be_transform_node(get_nodes_block(node));
1768 dbg_info *dbgi = get_irn_dbg_info(node);
1769 ir_mode *mode = get_irn_mode(node);
1774 if (mode_is_float(mode)) {
1775 ir_node *new_op = be_transform_node(op);
1776 if (ia32_cg_config.use_sse2) {
1777 /* TODO: non-optimal... if we have many xXors, then we should
1778 * rather create a load for the const and use that instead of
1779 * several AM nodes... */
1780 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1782 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1783 nomem, new_op, noreg_xmm);
1785 size = get_mode_size_bits(mode);
1786 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1788 set_ia32_am_sc(new_node, ent);
1789 set_ia32_op_type(new_node, ia32_AddrModeS);
1790 set_ia32_ls_mode(new_node, mode);
1792 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1795 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1798 SET_IA32_ORIG_NODE(new_node, node);
1804 * Transforms a Not node.
1806 * @return The created ia32 Not node
1808 static ir_node *gen_Not(ir_node *node)
1810 ir_node *op = get_Not_op(node);
1812 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1813 assert (! mode_is_float(get_irn_mode(node)));
1815 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1821 * Transforms an Abs node.
1823 * @return The created ia32 Abs node
1825 static ir_node *gen_Abs(ir_node *node)
1827 ir_node *block = get_nodes_block(node);
1828 ir_node *new_block = be_transform_node(block);
1829 ir_node *op = get_Abs_op(node);
1830 dbg_info *dbgi = get_irn_dbg_info(node);
1831 ir_mode *mode = get_irn_mode(node);
1837 if (mode_is_float(mode)) {
1838 new_op = be_transform_node(op);
1840 if (ia32_cg_config.use_sse2) {
1841 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1842 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1843 nomem, new_op, noreg_fp);
1845 size = get_mode_size_bits(mode);
1846 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1848 set_ia32_am_sc(new_node, ent);
1850 SET_IA32_ORIG_NODE(new_node, node);
1852 set_ia32_op_type(new_node, ia32_AddrModeS);
1853 set_ia32_ls_mode(new_node, mode);
1855 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1856 SET_IA32_ORIG_NODE(new_node, node);
1859 ir_node *xor, *sign_extension;
1861 if (get_mode_size_bits(mode) == 32) {
1862 new_op = be_transform_node(op);
1864 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1867 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1869 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1870 nomem, new_op, sign_extension);
1871 SET_IA32_ORIG_NODE(xor, node);
1873 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1874 nomem, xor, sign_extension);
1875 SET_IA32_ORIG_NODE(new_node, node);
1882 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1884 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1886 dbg_info *dbgi = get_irn_dbg_info(cmp);
1887 ir_node *block = get_nodes_block(cmp);
1888 ir_node *new_block = be_transform_node(block);
1889 ir_node *op1 = be_transform_node(x);
1890 ir_node *op2 = be_transform_node(n);
1892 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1896 * Transform a node returning a "flag" result.
1898 * @param node the node to transform
1899 * @param pnc_out the compare mode to use
1901 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1908 /* we have a Cmp as input */
1909 if (is_Proj(node)) {
1910 ir_node *pred = get_Proj_pred(node);
1912 pn_Cmp pnc = get_Proj_proj(node);
1913 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1914 ir_node *l = get_Cmp_left(pred);
1915 ir_node *r = get_Cmp_right(pred);
1917 ir_node *la = get_And_left(l);
1918 ir_node *ra = get_And_right(l);
1920 ir_node *c = get_Shl_left(la);
1921 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1922 /* (1 << n) & ra) */
1923 ir_node *n = get_Shl_right(la);
1924 flags = gen_bt(pred, ra, n);
1925 /* we must generate a Jc/Jnc jump */
1926 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1929 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1934 ir_node *c = get_Shl_left(ra);
1935 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1936 /* la & (1 << n)) */
1937 ir_node *n = get_Shl_right(ra);
1938 flags = gen_bt(pred, la, n);
1939 /* we must generate a Jc/Jnc jump */
1940 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1943 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1949 flags = be_transform_node(pred);
1955 /* a mode_b value, we have to compare it against 0 */
1956 dbgi = get_irn_dbg_info(node);
1957 new_block = be_transform_node(get_nodes_block(node));
1958 new_op = be_transform_node(node);
1959 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1960 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1961 *pnc_out = pn_Cmp_Lg;
1966 * Transforms a Load.
1968 * @return the created ia32 Load node
1970 static ir_node *gen_Load(ir_node *node)
1972 ir_node *old_block = get_nodes_block(node);
1973 ir_node *block = be_transform_node(old_block);
1974 ir_node *ptr = get_Load_ptr(node);
1975 ir_node *mem = get_Load_mem(node);
1976 ir_node *new_mem = be_transform_node(mem);
1979 dbg_info *dbgi = get_irn_dbg_info(node);
1980 ir_mode *mode = get_Load_mode(node);
1983 ia32_address_t addr;
1985 /* construct load address */
1986 memset(&addr, 0, sizeof(addr));
1987 ia32_create_address_mode(&addr, ptr, 0);
1994 base = be_transform_node(base);
1997 if (index == NULL) {
2000 index = be_transform_node(index);
2003 if (mode_is_float(mode)) {
2004 if (ia32_cg_config.use_sse2) {
2005 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2007 res_mode = mode_xmm;
2009 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2011 res_mode = mode_vfp;
2014 assert(mode != mode_b);
2016 /* create a conv node with address mode for smaller modes */
2017 if (get_mode_size_bits(mode) < 32) {
2018 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2019 new_mem, noreg_GP, mode);
2021 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2026 set_irn_pinned(new_node, get_irn_pinned(node));
2027 set_ia32_op_type(new_node, ia32_AddrModeS);
2028 set_ia32_ls_mode(new_node, mode);
2029 set_address(new_node, &addr);
2031 if (get_irn_pinned(node) == op_pin_state_floats) {
2032 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2033 && pn_ia32_vfld_res == pn_ia32_Load_res
2034 && pn_ia32_Load_res == pn_ia32_res);
2035 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2038 SET_IA32_ORIG_NODE(new_node, node);
2040 be_dep_on_frame(new_node);
2044 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2045 ir_node *ptr, ir_node *other)
2052 /* we only use address mode if we're the only user of the load */
2053 if (get_irn_n_edges(node) > 1)
2056 load = get_Proj_pred(node);
2059 if (get_nodes_block(load) != block)
2062 /* store should have the same pointer as the load */
2063 if (get_Load_ptr(load) != ptr)
2066 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2067 if (other != NULL &&
2068 get_nodes_block(other) == block &&
2069 heights_reachable_in_block(heights, other, load)) {
2073 if (prevents_AM(block, load, mem))
2075 /* Store should be attached to the load via mem */
2076 assert(heights_reachable_in_block(heights, mem, load));
2081 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2082 ir_node *mem, ir_node *ptr, ir_mode *mode,
2083 construct_binop_dest_func *func,
2084 construct_binop_dest_func *func8bit,
2085 match_flags_t flags)
2087 ir_node *src_block = get_nodes_block(node);
2095 ia32_address_mode_t am;
2096 ia32_address_t *addr = &am.addr;
2097 memset(&am, 0, sizeof(am));
2099 assert(flags & match_immediate); /* there is no destam node without... */
2100 commutative = (flags & match_commutative) != 0;
2102 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2103 build_address(&am, op1, ia32_create_am_double_use);
2104 new_op = create_immediate_or_transform(op2, 0);
2105 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2106 build_address(&am, op2, ia32_create_am_double_use);
2107 new_op = create_immediate_or_transform(op1, 0);
2112 if (addr->base == NULL)
2113 addr->base = noreg_GP;
2114 if (addr->index == NULL)
2115 addr->index = noreg_GP;
2116 if (addr->mem == NULL)
2119 dbgi = get_irn_dbg_info(node);
2120 block = be_transform_node(src_block);
2121 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2123 if (get_mode_size_bits(mode) == 8) {
2124 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2126 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2128 set_address(new_node, addr);
2129 set_ia32_op_type(new_node, ia32_AddrModeD);
2130 set_ia32_ls_mode(new_node, mode);
2131 SET_IA32_ORIG_NODE(new_node, node);
2133 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2134 mem_proj = be_transform_node(am.mem_proj);
2135 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2140 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2141 ir_node *ptr, ir_mode *mode,
2142 construct_unop_dest_func *func)
2144 ir_node *src_block = get_nodes_block(node);
2150 ia32_address_mode_t am;
2151 ia32_address_t *addr = &am.addr;
2153 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2156 memset(&am, 0, sizeof(am));
2157 build_address(&am, op, ia32_create_am_double_use);
2159 dbgi = get_irn_dbg_info(node);
2160 block = be_transform_node(src_block);
2161 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2162 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2163 set_address(new_node, addr);
2164 set_ia32_op_type(new_node, ia32_AddrModeD);
2165 set_ia32_ls_mode(new_node, mode);
2166 SET_IA32_ORIG_NODE(new_node, node);
2168 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2169 mem_proj = be_transform_node(am.mem_proj);
2170 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2175 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2177 ir_mode *mode = get_irn_mode(node);
2178 ir_node *mux_true = get_Mux_true(node);
2179 ir_node *mux_false = get_Mux_false(node);
2189 ia32_address_t addr;
2191 if (get_mode_size_bits(mode) != 8)
2194 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2196 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2202 build_address_ptr(&addr, ptr, mem);
2204 dbgi = get_irn_dbg_info(node);
2205 block = get_nodes_block(node);
2206 new_block = be_transform_node(block);
2207 cond = get_Mux_sel(node);
2208 flags = get_flags_node(cond, &pnc);
2209 new_mem = be_transform_node(mem);
2210 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2211 addr.index, addr.mem, flags, pnc, negated);
2212 set_address(new_node, &addr);
2213 set_ia32_op_type(new_node, ia32_AddrModeD);
2214 set_ia32_ls_mode(new_node, mode);
2215 SET_IA32_ORIG_NODE(new_node, node);
2220 static ir_node *try_create_dest_am(ir_node *node)
2222 ir_node *val = get_Store_value(node);
2223 ir_node *mem = get_Store_mem(node);
2224 ir_node *ptr = get_Store_ptr(node);
2225 ir_mode *mode = get_irn_mode(val);
2226 unsigned bits = get_mode_size_bits(mode);
2231 /* handle only GP modes for now... */
2232 if (!ia32_mode_needs_gp_reg(mode))
2236 /* store must be the only user of the val node */
2237 if (get_irn_n_edges(val) > 1)
2239 /* skip pointless convs */
2241 ir_node *conv_op = get_Conv_op(val);
2242 ir_mode *pred_mode = get_irn_mode(conv_op);
2243 if (!ia32_mode_needs_gp_reg(pred_mode))
2245 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2253 /* value must be in the same block */
2254 if (get_nodes_block(node) != get_nodes_block(val))
2257 switch (get_irn_opcode(val)) {
2259 op1 = get_Add_left(val);
2260 op2 = get_Add_right(val);
2261 if (ia32_cg_config.use_incdec) {
2262 if (is_Const_1(op2)) {
2263 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2265 } else if (is_Const_Minus_1(op2)) {
2266 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2270 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2271 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2272 match_commutative | match_immediate);
2275 op1 = get_Sub_left(val);
2276 op2 = get_Sub_right(val);
2277 if (is_Const(op2)) {
2278 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2280 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2281 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2285 op1 = get_And_left(val);
2286 op2 = get_And_right(val);
2287 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2288 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2289 match_commutative | match_immediate);
2292 op1 = get_Or_left(val);
2293 op2 = get_Or_right(val);
2294 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2295 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2296 match_commutative | match_immediate);
2299 op1 = get_Eor_left(val);
2300 op2 = get_Eor_right(val);
2301 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2302 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2303 match_commutative | match_immediate);
2306 op1 = get_Shl_left(val);
2307 op2 = get_Shl_right(val);
2308 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2309 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2313 op1 = get_Shr_left(val);
2314 op2 = get_Shr_right(val);
2315 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2316 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2320 op1 = get_Shrs_left(val);
2321 op2 = get_Shrs_right(val);
2322 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2323 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2327 op1 = get_Rotl_left(val);
2328 op2 = get_Rotl_right(val);
2329 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2330 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2333 /* TODO: match ROR patterns... */
2335 new_node = try_create_SetMem(val, ptr, mem);
2338 op1 = get_Minus_op(val);
2339 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2342 /* should be lowered already */
2343 assert(mode != mode_b);
2344 op1 = get_Not_op(val);
2345 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2351 if (new_node != NULL) {
2352 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2353 get_irn_pinned(node) == op_pin_state_pinned) {
2354 set_irn_pinned(new_node, op_pin_state_pinned);
2361 static bool possible_int_mode_for_fp(ir_mode *mode)
2365 if (!mode_is_signed(mode))
2367 size = get_mode_size_bits(mode);
2368 if (size != 16 && size != 32)
2373 static int is_float_to_int_conv(const ir_node *node)
2375 ir_mode *mode = get_irn_mode(node);
2379 if (!possible_int_mode_for_fp(mode))
2384 conv_op = get_Conv_op(node);
2385 conv_mode = get_irn_mode(conv_op);
2387 if (!mode_is_float(conv_mode))
2394 * Transform a Store(floatConst) into a sequence of
2397 * @return the created ia32 Store node
2399 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2401 ir_mode *mode = get_irn_mode(cns);
2402 unsigned size = get_mode_size_bytes(mode);
2403 tarval *tv = get_Const_tarval(cns);
2404 ir_node *block = get_nodes_block(node);
2405 ir_node *new_block = be_transform_node(block);
2406 ir_node *ptr = get_Store_ptr(node);
2407 ir_node *mem = get_Store_mem(node);
2408 dbg_info *dbgi = get_irn_dbg_info(node);
2412 ia32_address_t addr;
2414 assert(size % 4 == 0);
2417 build_address_ptr(&addr, ptr, mem);
2421 get_tarval_sub_bits(tv, ofs) |
2422 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2423 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2424 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2425 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2427 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2428 addr.index, addr.mem, imm);
2430 set_irn_pinned(new_node, get_irn_pinned(node));
2431 set_ia32_op_type(new_node, ia32_AddrModeD);
2432 set_ia32_ls_mode(new_node, mode_Iu);
2433 set_address(new_node, &addr);
2434 SET_IA32_ORIG_NODE(new_node, node);
2437 ins[i++] = new_node;
2442 } while (size != 0);
2445 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2452 * Generate a vfist or vfisttp instruction.
2454 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2455 ir_node *mem, ir_node *val, ir_node **fist)
2459 if (ia32_cg_config.use_fisttp) {
2460 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2461 if other users exists */
2462 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2463 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2464 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2465 be_new_Keep(reg_class, irg, block, 1, &value);
2467 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2470 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2473 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2479 * Transforms a general (no special case) Store.
2481 * @return the created ia32 Store node
2483 static ir_node *gen_general_Store(ir_node *node)
2485 ir_node *val = get_Store_value(node);
2486 ir_mode *mode = get_irn_mode(val);
2487 ir_node *block = get_nodes_block(node);
2488 ir_node *new_block = be_transform_node(block);
2489 ir_node *ptr = get_Store_ptr(node);
2490 ir_node *mem = get_Store_mem(node);
2491 dbg_info *dbgi = get_irn_dbg_info(node);
2492 ir_node *new_val, *new_node, *store;
2493 ia32_address_t addr;
2495 /* check for destination address mode */
2496 new_node = try_create_dest_am(node);
2497 if (new_node != NULL)
2500 /* construct store address */
2501 memset(&addr, 0, sizeof(addr));
2502 ia32_create_address_mode(&addr, ptr, 0);
2504 if (addr.base == NULL) {
2505 addr.base = noreg_GP;
2507 addr.base = be_transform_node(addr.base);
2510 if (addr.index == NULL) {
2511 addr.index = noreg_GP;
2513 addr.index = be_transform_node(addr.index);
2515 addr.mem = be_transform_node(mem);
2517 if (mode_is_float(mode)) {
2518 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2520 while (is_Conv(val) && mode == get_irn_mode(val)) {
2521 ir_node *op = get_Conv_op(val);
2522 if (!mode_is_float(get_irn_mode(op)))
2526 new_val = be_transform_node(val);
2527 if (ia32_cg_config.use_sse2) {
2528 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2529 addr.index, addr.mem, new_val);
2531 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2532 addr.index, addr.mem, new_val, mode);
2535 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2536 val = get_Conv_op(val);
2538 /* TODO: is this optimisation still necessary at all (middleend)? */
2539 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2540 while (is_Conv(val)) {
2541 ir_node *op = get_Conv_op(val);
2542 if (!mode_is_float(get_irn_mode(op)))
2544 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2548 new_val = be_transform_node(val);
2549 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2551 new_val = create_immediate_or_transform(val, 0);
2552 assert(mode != mode_b);
2554 if (get_mode_size_bits(mode) == 8) {
2555 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2556 addr.index, addr.mem, new_val);
2558 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2559 addr.index, addr.mem, new_val);
2564 set_irn_pinned(store, get_irn_pinned(node));
2565 set_ia32_op_type(store, ia32_AddrModeD);
2566 set_ia32_ls_mode(store, mode);
2568 set_address(store, &addr);
2569 SET_IA32_ORIG_NODE(store, node);
2575 * Transforms a Store.
2577 * @return the created ia32 Store node
2579 static ir_node *gen_Store(ir_node *node)
2581 ir_node *val = get_Store_value(node);
2582 ir_mode *mode = get_irn_mode(val);
2584 if (mode_is_float(mode) && is_Const(val)) {
2585 /* We can transform every floating const store
2586 into a sequence of integer stores.
2587 If the constant is already in a register,
2588 it would be better to use it, but we don't
2589 have this information here. */
2590 return gen_float_const_Store(node, val);
2592 return gen_general_Store(node);
2596 * Transforms a Switch.
2598 * @return the created ia32 SwitchJmp node
2600 static ir_node *create_Switch(ir_node *node)
2602 dbg_info *dbgi = get_irn_dbg_info(node);
2603 ir_node *block = be_transform_node(get_nodes_block(node));
2604 ir_node *sel = get_Cond_selector(node);
2605 ir_node *new_sel = be_transform_node(sel);
2606 long switch_min = LONG_MAX;
2607 long switch_max = LONG_MIN;
2608 long default_pn = get_Cond_default_proj(node);
2610 const ir_edge_t *edge;
2612 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2614 /* determine the smallest switch case value */
2615 foreach_out_edge(node, edge) {
2616 ir_node *proj = get_edge_src_irn(edge);
2617 long pn = get_Proj_proj(proj);
2618 if (pn == default_pn)
2621 if (pn < switch_min)
2623 if (pn > switch_max)
2627 if ((unsigned long) (switch_max - switch_min) > 256000) {
2628 panic("Size of switch %+F bigger than 256000", node);
2631 if (switch_min != 0) {
2632 /* if smallest switch case is not 0 we need an additional sub */
2633 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2634 add_ia32_am_offs_int(new_sel, -switch_min);
2635 set_ia32_op_type(new_sel, ia32_AddrModeS);
2637 SET_IA32_ORIG_NODE(new_sel, node);
2640 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2641 SET_IA32_ORIG_NODE(new_node, node);
2647 * Transform a Cond node.
2649 static ir_node *gen_Cond(ir_node *node)
2651 ir_node *block = get_nodes_block(node);
2652 ir_node *new_block = be_transform_node(block);
2653 dbg_info *dbgi = get_irn_dbg_info(node);
2654 ir_node *sel = get_Cond_selector(node);
2655 ir_mode *sel_mode = get_irn_mode(sel);
2656 ir_node *flags = NULL;
2660 if (sel_mode != mode_b) {
2661 return create_Switch(node);
2664 /* we get flags from a Cmp */
2665 flags = get_flags_node(sel, &pnc);
2667 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2668 SET_IA32_ORIG_NODE(new_node, node);
2674 * Transform a be_Copy.
2676 static ir_node *gen_be_Copy(ir_node *node)
2678 ir_node *new_node = be_duplicate_node(node);
2679 ir_mode *mode = get_irn_mode(new_node);
2681 if (ia32_mode_needs_gp_reg(mode)) {
2682 set_irn_mode(new_node, mode_Iu);
2688 static ir_node *create_Fucom(ir_node *node)
2690 dbg_info *dbgi = get_irn_dbg_info(node);
2691 ir_node *block = get_nodes_block(node);
2692 ir_node *new_block = be_transform_node(block);
2693 ir_node *left = get_Cmp_left(node);
2694 ir_node *new_left = be_transform_node(left);
2695 ir_node *right = get_Cmp_right(node);
2699 if (ia32_cg_config.use_fucomi) {
2700 new_right = be_transform_node(right);
2701 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2703 set_ia32_commutative(new_node);
2704 SET_IA32_ORIG_NODE(new_node, node);
2706 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2707 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2709 new_right = be_transform_node(right);
2710 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2713 set_ia32_commutative(new_node);
2715 SET_IA32_ORIG_NODE(new_node, node);
2717 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2718 SET_IA32_ORIG_NODE(new_node, node);
2724 static ir_node *create_Ucomi(ir_node *node)
2726 dbg_info *dbgi = get_irn_dbg_info(node);
2727 ir_node *src_block = get_nodes_block(node);
2728 ir_node *new_block = be_transform_node(src_block);
2729 ir_node *left = get_Cmp_left(node);
2730 ir_node *right = get_Cmp_right(node);
2732 ia32_address_mode_t am;
2733 ia32_address_t *addr = &am.addr;
2735 match_arguments(&am, src_block, left, right, NULL,
2736 match_commutative | match_am);
2738 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2739 addr->mem, am.new_op1, am.new_op2,
2741 set_am_attributes(new_node, &am);
2743 SET_IA32_ORIG_NODE(new_node, node);
2745 new_node = fix_mem_proj(new_node, &am);
2751 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2752 * to fold an and into a test node
2754 static bool can_fold_test_and(ir_node *node)
2756 const ir_edge_t *edge;
2758 /** we can only have eq and lg projs */
2759 foreach_out_edge(node, edge) {
2760 ir_node *proj = get_edge_src_irn(edge);
2761 pn_Cmp pnc = get_Proj_proj(proj);
2762 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2770 * returns true if it is assured, that the upper bits of a node are "clean"
2771 * which means for a 16 or 8 bit value, that the upper bits in the register
2772 * are 0 for unsigned and a copy of the last significant bit for signed
2775 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2777 assert(ia32_mode_needs_gp_reg(mode));
2778 if (get_mode_size_bits(mode) >= 32)
2781 if (is_Proj(transformed_node))
2782 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2784 switch (get_ia32_irn_opcode(transformed_node)) {
2785 case iro_ia32_Conv_I2I:
2786 case iro_ia32_Conv_I2I8Bit: {
2787 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2788 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2790 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2797 if (mode_is_signed(mode)) {
2798 return false; /* TODO handle signed modes */
2800 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2801 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2802 const ia32_immediate_attr_t *attr
2803 = get_ia32_immediate_attr_const(right);
2804 if (attr->symconst == 0 &&
2805 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2809 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2813 /* TODO too conservative if shift amount is constant */
2814 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2817 if (!mode_is_signed(mode)) {
2819 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2820 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2822 /* TODO if one is known to be zero extended, then || is sufficient */
2827 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2828 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2830 case iro_ia32_Const:
2831 case iro_ia32_Immediate: {
2832 const ia32_immediate_attr_t *attr =
2833 get_ia32_immediate_attr_const(transformed_node);
2834 if (mode_is_signed(mode)) {
2835 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2836 return shifted == 0 || shifted == -1;
2838 unsigned long shifted = (unsigned long)attr->offset;
2839 shifted >>= get_mode_size_bits(mode);
2840 return shifted == 0;
2850 * Generate code for a Cmp.
2852 static ir_node *gen_Cmp(ir_node *node)
2854 dbg_info *dbgi = get_irn_dbg_info(node);
2855 ir_node *block = get_nodes_block(node);
2856 ir_node *new_block = be_transform_node(block);
2857 ir_node *left = get_Cmp_left(node);
2858 ir_node *right = get_Cmp_right(node);
2859 ir_mode *cmp_mode = get_irn_mode(left);
2861 ia32_address_mode_t am;
2862 ia32_address_t *addr = &am.addr;
2865 if (mode_is_float(cmp_mode)) {
2866 if (ia32_cg_config.use_sse2) {
2867 return create_Ucomi(node);
2869 return create_Fucom(node);
2873 assert(ia32_mode_needs_gp_reg(cmp_mode));
2875 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2876 cmp_unsigned = !mode_is_signed(cmp_mode);
2877 if (is_Const_0(right) &&
2879 get_irn_n_edges(left) == 1 &&
2880 can_fold_test_and(node)) {
2881 /* Test(and_left, and_right) */
2882 ir_node *and_left = get_And_left(left);
2883 ir_node *and_right = get_And_right(left);
2885 /* matze: code here used mode instead of cmd_mode, I think it is always
2886 * the same as cmp_mode, but I leave this here to see if this is really
2889 assert(get_irn_mode(and_left) == cmp_mode);
2891 match_arguments(&am, block, and_left, and_right, NULL,
2893 match_am | match_8bit_am | match_16bit_am |
2894 match_am_and_immediates | match_immediate);
2896 /* use 32bit compare mode if possible since the opcode is smaller */
2897 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2898 upper_bits_clean(am.new_op2, cmp_mode)) {
2899 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2902 if (get_mode_size_bits(cmp_mode) == 8) {
2903 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2904 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2907 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2908 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2911 /* Cmp(left, right) */
2912 match_arguments(&am, block, left, right, NULL,
2913 match_commutative | match_am | match_8bit_am |
2914 match_16bit_am | match_am_and_immediates |
2916 /* use 32bit compare mode if possible since the opcode is smaller */
2917 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2918 upper_bits_clean(am.new_op2, cmp_mode)) {
2919 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2922 if (get_mode_size_bits(cmp_mode) == 8) {
2923 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2924 addr->index, addr->mem, am.new_op1,
2925 am.new_op2, am.ins_permuted,
2928 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2929 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2932 set_am_attributes(new_node, &am);
2933 set_ia32_ls_mode(new_node, cmp_mode);
2935 SET_IA32_ORIG_NODE(new_node, node);
2937 new_node = fix_mem_proj(new_node, &am);
2942 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2945 dbg_info *dbgi = get_irn_dbg_info(node);
2946 ir_node *block = get_nodes_block(node);
2947 ir_node *new_block = be_transform_node(block);
2948 ir_node *val_true = get_Mux_true(node);
2949 ir_node *val_false = get_Mux_false(node);
2951 ia32_address_mode_t am;
2952 ia32_address_t *addr;
2954 assert(ia32_cg_config.use_cmov);
2955 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2959 match_arguments(&am, block, val_false, val_true, flags,
2960 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2962 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2963 addr->mem, am.new_op1, am.new_op2, new_flags,
2964 am.ins_permuted, pnc);
2965 set_am_attributes(new_node, &am);
2967 SET_IA32_ORIG_NODE(new_node, node);
2969 new_node = fix_mem_proj(new_node, &am);
2975 * Creates a ia32 Setcc instruction.
2977 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2978 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2981 ir_mode *mode = get_irn_mode(orig_node);
2984 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2985 SET_IA32_ORIG_NODE(new_node, orig_node);
2987 /* we might need to conv the result up */
2988 if (get_mode_size_bits(mode) > 8) {
2989 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2990 nomem, new_node, mode_Bu);
2991 SET_IA32_ORIG_NODE(new_node, orig_node);
2998 * Create instruction for an unsigned Difference or Zero.
3000 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3002 ir_graph *irg = current_ir_graph;
3003 ir_mode *mode = get_irn_mode(psi);
3004 ir_node *new_node, *sub, *sbb, *eflags, *block;
3008 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3009 match_mode_neutral | match_am | match_immediate | match_two_users);
3011 block = get_nodes_block(new_node);
3013 if (is_Proj(new_node)) {
3014 sub = get_Proj_pred(new_node);
3015 assert(is_ia32_Sub(sub));
3018 set_irn_mode(sub, mode_T);
3019 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
3021 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3023 dbgi = get_irn_dbg_info(psi);
3024 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3026 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3027 set_ia32_commutative(new_node);
3032 * Create an const array of two float consts.
3034 * @param c0 the first constant
3035 * @param c1 the second constant
3036 * @param new_mode IN/OUT for the mode of the constants, if NULL
3037 * smallest possible mode will be used
3039 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3041 ir_mode *mode = *new_mode;
3043 ir_initializer_t *initializer;
3044 tarval *tv0 = get_Const_tarval(c0);
3045 tarval *tv1 = get_Const_tarval(c1);
3048 /* detect the best mode for the constants */
3049 mode = get_tarval_mode(tv0);
3051 if (mode != mode_F) {
3052 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3053 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3055 tv0 = tarval_convert_to(tv0, mode);
3056 tv1 = tarval_convert_to(tv1, mode);
3057 } else if (mode != mode_D) {
3058 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3059 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3061 tv0 = tarval_convert_to(tv0, mode);
3062 tv1 = tarval_convert_to(tv1, mode);
3069 tp = ia32_create_float_type(mode, 4);
3070 tp = ia32_create_float_array(tp);
3072 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3074 set_entity_ld_ident(ent, get_entity_ident(ent));
3075 set_entity_visibility(ent, visibility_local);
3076 set_entity_variability(ent, variability_constant);
3077 set_entity_allocation(ent, allocation_static);
3079 initializer = create_initializer_compound(2);
3081 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3082 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3084 set_entity_initializer(ent, initializer);
3091 * Transforms a Mux node into some code sequence.
3093 * @return The transformed node.
3095 static ir_node *gen_Mux(ir_node *node)
3097 dbg_info *dbgi = get_irn_dbg_info(node);
3098 ir_node *block = get_nodes_block(node);
3099 ir_node *new_block = be_transform_node(block);
3100 ir_node *mux_true = get_Mux_true(node);
3101 ir_node *mux_false = get_Mux_false(node);
3102 ir_node *cond = get_Mux_sel(node);
3103 ir_mode *mode = get_irn_mode(node);
3108 assert(get_irn_mode(cond) == mode_b);
3110 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3111 if (mode_is_float(mode)) {
3112 ir_node *cmp = get_Proj_pred(cond);
3113 ir_node *cmp_left = get_Cmp_left(cmp);
3114 ir_node *cmp_right = get_Cmp_right(cmp);
3115 pn_Cmp pnc = get_Proj_proj(cond);
3117 if (ia32_cg_config.use_sse2) {
3118 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3119 if (cmp_left == mux_true && cmp_right == mux_false) {
3120 /* Mux(a <= b, a, b) => MIN */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3122 match_commutative | match_am | match_two_users);
3123 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3124 /* Mux(a <= b, b, a) => MAX */
3125 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3126 match_commutative | match_am | match_two_users);
3128 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3129 if (cmp_left == mux_true && cmp_right == mux_false) {
3130 /* Mux(a >= b, a, b) => MAX */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3132 match_commutative | match_am | match_two_users);
3133 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3134 /* Mux(a >= b, b, a) => MIN */
3135 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3136 match_commutative | match_am | match_two_users);
3140 if (is_Const(mux_true) && is_Const(mux_false)) {
3141 ia32_address_mode_t am;
3146 flags = get_flags_node(cond, &pnc);
3147 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3149 if (ia32_cg_config.use_sse2) {
3150 /* cannot load from different mode on SSE */
3153 /* x87 can load any mode */
3157 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3159 switch (get_mode_size_bytes(new_mode)) {
3169 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3170 set_ia32_am_scale(new_node, 2);
3175 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3176 set_ia32_am_scale(new_node, 1);
3179 /* arg, shift 16 NOT supported */
3181 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3184 panic("Unsupported constant size");
3187 am.ls_mode = new_mode;
3188 am.addr.base = noreg_GP;
3189 am.addr.index = new_node;
3190 am.addr.mem = nomem;
3192 am.addr.scale = scale;
3193 am.addr.use_frame = 0;
3194 am.addr.frame_entity = NULL;
3195 am.addr.symconst_sign = 0;
3196 am.mem_proj = am.addr.mem;
3197 am.op_type = ia32_AddrModeS;
3200 am.pinned = op_pin_state_floats;
3202 am.ins_permuted = 0;
3204 if (ia32_cg_config.use_sse2)
3205 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3207 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3208 set_am_attributes(load, &am);
3210 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3212 panic("cannot transform floating point Mux");
3215 assert(ia32_mode_needs_gp_reg(mode));
3217 if (is_Proj(cond)) {
3218 ir_node *cmp = get_Proj_pred(cond);
3220 ir_node *cmp_left = get_Cmp_left(cmp);
3221 ir_node *cmp_right = get_Cmp_right(cmp);
3222 pn_Cmp pnc = get_Proj_proj(cond);
3224 /* check for unsigned Doz first */
3225 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3226 is_Const_0(mux_false) && is_Sub(mux_true) &&
3227 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3228 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3229 return create_Doz(node, cmp_left, cmp_right);
3230 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3231 is_Const_0(mux_true) && is_Sub(mux_false) &&
3232 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3233 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3234 return create_Doz(node, cmp_left, cmp_right);
3239 flags = get_flags_node(cond, &pnc);
3241 if (is_Const(mux_true) && is_Const(mux_false)) {
3242 /* both are const, good */
3243 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3244 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3245 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3246 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3248 /* Not that simple. */
3253 new_node = create_CMov(node, cond, flags, pnc);
3261 * Create a conversion from x87 state register to general purpose.
3263 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3265 ir_node *block = be_transform_node(get_nodes_block(node));
3266 ir_node *op = get_Conv_op(node);
3267 ir_node *new_op = be_transform_node(op);
3268 ir_graph *irg = current_ir_graph;
3269 dbg_info *dbgi = get_irn_dbg_info(node);
3270 ir_mode *mode = get_irn_mode(node);
3271 ir_node *fist, *load, *mem;
3273 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3274 set_irn_pinned(fist, op_pin_state_floats);
3275 set_ia32_use_frame(fist);
3276 set_ia32_op_type(fist, ia32_AddrModeD);
3278 assert(get_mode_size_bits(mode) <= 32);
3279 /* exception we can only store signed 32 bit integers, so for unsigned
3280 we store a 64bit (signed) integer and load the lower bits */
3281 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3282 set_ia32_ls_mode(fist, mode_Ls);
3284 set_ia32_ls_mode(fist, mode_Is);
3286 SET_IA32_ORIG_NODE(fist, node);
3289 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3291 set_irn_pinned(load, op_pin_state_floats);
3292 set_ia32_use_frame(load);
3293 set_ia32_op_type(load, ia32_AddrModeS);
3294 set_ia32_ls_mode(load, mode_Is);
3295 if (get_ia32_ls_mode(fist) == mode_Ls) {
3296 ia32_attr_t *attr = get_ia32_attr(load);
3297 attr->data.need_64bit_stackent = 1;
3299 ia32_attr_t *attr = get_ia32_attr(load);
3300 attr->data.need_32bit_stackent = 1;
3302 SET_IA32_ORIG_NODE(load, node);
3304 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3308 * Creates a x87 strict Conv by placing a Store and a Load
3310 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3312 ir_node *block = get_nodes_block(node);
3313 ir_graph *irg = current_ir_graph;
3314 dbg_info *dbgi = get_irn_dbg_info(node);
3315 ir_node *frame = get_irg_frame(irg);
3316 ir_node *store, *load;
3319 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3320 set_ia32_use_frame(store);
3321 set_ia32_op_type(store, ia32_AddrModeD);
3322 SET_IA32_ORIG_NODE(store, node);
3324 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3325 set_ia32_use_frame(load);
3326 set_ia32_op_type(load, ia32_AddrModeS);
3327 SET_IA32_ORIG_NODE(load, node);
3329 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3333 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3334 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3336 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3338 func = get_mode_size_bits(mode) == 8 ?
3339 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3340 return func(dbgi, block, base, index, mem, val, mode);
3344 * Create a conversion from general purpose to x87 register
3346 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3348 ir_node *src_block = get_nodes_block(node);
3349 ir_node *block = be_transform_node(src_block);
3350 ir_graph *irg = current_ir_graph;
3351 dbg_info *dbgi = get_irn_dbg_info(node);
3352 ir_node *op = get_Conv_op(node);
3353 ir_node *new_op = NULL;
3355 ir_mode *store_mode;
3360 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3361 if (possible_int_mode_for_fp(src_mode)) {
3362 ia32_address_mode_t am;
3364 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3365 if (am.op_type == ia32_AddrModeS) {
3366 ia32_address_t *addr = &am.addr;
3368 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3369 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3371 set_am_attributes(fild, &am);
3372 SET_IA32_ORIG_NODE(fild, node);
3374 fix_mem_proj(fild, &am);
3379 if (new_op == NULL) {
3380 new_op = be_transform_node(op);
3383 mode = get_irn_mode(op);
3385 /* first convert to 32 bit signed if necessary */
3386 if (get_mode_size_bits(src_mode) < 32) {
3387 if (!upper_bits_clean(new_op, src_mode)) {
3388 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3389 SET_IA32_ORIG_NODE(new_op, node);
3394 assert(get_mode_size_bits(mode) == 32);
3397 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3399 set_ia32_use_frame(store);
3400 set_ia32_op_type(store, ia32_AddrModeD);
3401 set_ia32_ls_mode(store, mode_Iu);
3403 /* exception for 32bit unsigned, do a 64bit spill+load */
3404 if (!mode_is_signed(mode)) {
3407 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3409 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3410 noreg_GP, nomem, zero_const);
3412 set_ia32_use_frame(zero_store);
3413 set_ia32_op_type(zero_store, ia32_AddrModeD);
3414 add_ia32_am_offs_int(zero_store, 4);
3415 set_ia32_ls_mode(zero_store, mode_Iu);
3420 store = new_rd_Sync(dbgi, irg, block, 2, in);
3421 store_mode = mode_Ls;
3423 store_mode = mode_Is;
3427 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3429 set_ia32_use_frame(fild);
3430 set_ia32_op_type(fild, ia32_AddrModeS);
3431 set_ia32_ls_mode(fild, store_mode);
3433 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3439 * Create a conversion from one integer mode into another one
3441 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3442 dbg_info *dbgi, ir_node *block, ir_node *op,
3445 ir_node *new_block = be_transform_node(block);
3447 ir_mode *smaller_mode;
3448 ia32_address_mode_t am;
3449 ia32_address_t *addr = &am.addr;
3452 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3453 smaller_mode = src_mode;
3455 smaller_mode = tgt_mode;
3458 #ifdef DEBUG_libfirm
3460 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3465 match_arguments(&am, block, NULL, op, NULL,
3466 match_am | match_8bit_am | match_16bit_am);
3468 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3469 /* unnecessary conv. in theory it shouldn't have been AM */
3470 assert(is_ia32_NoReg_GP(addr->base));
3471 assert(is_ia32_NoReg_GP(addr->index));
3472 assert(is_NoMem(addr->mem));
3473 assert(am.addr.offset == 0);
3474 assert(am.addr.symconst_ent == NULL);
3478 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3479 addr->mem, am.new_op2, smaller_mode);
3480 set_am_attributes(new_node, &am);
3481 /* match_arguments assume that out-mode = in-mode, this isn't true here
3483 set_ia32_ls_mode(new_node, smaller_mode);
3484 SET_IA32_ORIG_NODE(new_node, node);
3485 new_node = fix_mem_proj(new_node, &am);
3490 * Transforms a Conv node.
3492 * @return The created ia32 Conv node
3494 static ir_node *gen_Conv(ir_node *node)
3496 ir_node *block = get_nodes_block(node);
3497 ir_node *new_block = be_transform_node(block);
3498 ir_node *op = get_Conv_op(node);
3499 ir_node *new_op = NULL;
3500 dbg_info *dbgi = get_irn_dbg_info(node);
3501 ir_mode *src_mode = get_irn_mode(op);
3502 ir_mode *tgt_mode = get_irn_mode(node);
3503 int src_bits = get_mode_size_bits(src_mode);
3504 int tgt_bits = get_mode_size_bits(tgt_mode);
3505 ir_node *res = NULL;
3507 assert(!mode_is_int(src_mode) || src_bits <= 32);
3508 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3510 if (src_mode == mode_b) {
3511 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3512 /* nothing to do, we already model bools as 0/1 ints */
3513 return be_transform_node(op);
3516 if (src_mode == tgt_mode) {
3517 if (get_Conv_strict(node)) {
3518 if (ia32_cg_config.use_sse2) {
3519 /* when we are in SSE mode, we can kill all strict no-op conversion */
3520 return be_transform_node(op);
3523 /* this should be optimized already, but who knows... */
3524 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3525 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3526 return be_transform_node(op);
3530 if (mode_is_float(src_mode)) {
3531 new_op = be_transform_node(op);
3532 /* we convert from float ... */
3533 if (mode_is_float(tgt_mode)) {
3535 /* Matze: I'm a bit unsure what the following is for? seems wrong
3537 if (src_mode == mode_E && tgt_mode == mode_D
3538 && !get_Conv_strict(node)) {
3539 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3545 if (ia32_cg_config.use_sse2) {
3546 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3547 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3549 set_ia32_ls_mode(res, tgt_mode);
3551 if (get_Conv_strict(node)) {
3552 /* if fp_no_float_fold is not set then we assume that we
3553 * don't have any float operations in a non
3554 * mode_float_arithmetic mode and can skip strict upconvs */
3555 if (src_bits < tgt_bits
3556 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3557 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3560 res = gen_x87_strict_conv(tgt_mode, new_op);
3561 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3565 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3570 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3571 if (ia32_cg_config.use_sse2) {
3572 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3574 set_ia32_ls_mode(res, src_mode);
3576 return gen_x87_fp_to_gp(node);
3580 /* we convert from int ... */
3581 if (mode_is_float(tgt_mode)) {
3583 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3584 if (ia32_cg_config.use_sse2) {
3585 new_op = be_transform_node(op);
3586 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3588 set_ia32_ls_mode(res, tgt_mode);
3590 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3591 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3592 res = gen_x87_gp_to_fp(node, src_mode);
3594 /* we need a strict-Conv, if the int mode has more bits than the
3596 if (float_mantissa < int_mantissa) {
3597 res = gen_x87_strict_conv(tgt_mode, res);
3598 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3602 } else if (tgt_mode == mode_b) {
3603 /* mode_b lowering already took care that we only have 0/1 values */
3604 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3605 src_mode, tgt_mode));
3606 return be_transform_node(op);
3609 if (src_bits == tgt_bits) {
3610 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3611 src_mode, tgt_mode));
3612 return be_transform_node(op);
3615 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3623 static ir_node *create_immediate_or_transform(ir_node *node,
3624 char immediate_constraint_type)
3626 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3627 if (new_node == NULL) {
3628 new_node = be_transform_node(node);
3634 * Transforms a FrameAddr into an ia32 Add.
3636 static ir_node *gen_be_FrameAddr(ir_node *node)
3638 ir_node *block = be_transform_node(get_nodes_block(node));
3639 ir_node *op = be_get_FrameAddr_frame(node);
3640 ir_node *new_op = be_transform_node(op);
3641 dbg_info *dbgi = get_irn_dbg_info(node);
3644 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3645 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3646 set_ia32_use_frame(new_node);
3648 SET_IA32_ORIG_NODE(new_node, node);
3654 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3656 static ir_node *gen_be_Return(ir_node *node)
3658 ir_graph *irg = current_ir_graph;
3659 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3660 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3661 ir_entity *ent = get_irg_entity(irg);
3662 ir_type *tp = get_entity_type(ent);
3667 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3668 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3670 int pn_ret_val, pn_ret_mem, arity, i;
3672 assert(ret_val != NULL);
3673 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3674 return be_duplicate_node(node);
3677 res_type = get_method_res_type(tp, 0);
3679 if (! is_Primitive_type(res_type)) {
3680 return be_duplicate_node(node);
3683 mode = get_type_mode(res_type);
3684 if (! mode_is_float(mode)) {
3685 return be_duplicate_node(node);
3688 assert(get_method_n_ress(tp) == 1);
3690 pn_ret_val = get_Proj_proj(ret_val);
3691 pn_ret_mem = get_Proj_proj(ret_mem);
3693 /* get the Barrier */
3694 barrier = get_Proj_pred(ret_val);
3696 /* get result input of the Barrier */
3697 ret_val = get_irn_n(barrier, pn_ret_val);
3698 new_ret_val = be_transform_node(ret_val);
3700 /* get memory input of the Barrier */
3701 ret_mem = get_irn_n(barrier, pn_ret_mem);
3702 new_ret_mem = be_transform_node(ret_mem);
3704 frame = get_irg_frame(irg);
3706 dbgi = get_irn_dbg_info(barrier);
3707 block = be_transform_node(get_nodes_block(barrier));
3709 /* store xmm0 onto stack */
3710 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3711 new_ret_mem, new_ret_val);
3712 set_ia32_ls_mode(sse_store, mode);
3713 set_ia32_op_type(sse_store, ia32_AddrModeD);
3714 set_ia32_use_frame(sse_store);
3716 /* load into x87 register */
3717 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3718 set_ia32_op_type(fld, ia32_AddrModeS);
3719 set_ia32_use_frame(fld);
3721 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3722 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3724 /* create a new barrier */
3725 arity = get_irn_arity(barrier);
3726 in = ALLOCAN(ir_node*, arity);
3727 for (i = 0; i < arity; ++i) {
3730 if (i == pn_ret_val) {
3732 } else if (i == pn_ret_mem) {
3735 ir_node *in = get_irn_n(barrier, i);
3736 new_in = be_transform_node(in);
3741 new_barrier = new_ir_node(dbgi, irg, block,
3742 get_irn_op(barrier), get_irn_mode(barrier),
3744 copy_node_attr(barrier, new_barrier);
3745 be_duplicate_deps(barrier, new_barrier);
3746 be_set_transformed_node(barrier, new_barrier);
3748 /* transform normally */
3749 return be_duplicate_node(node);
3753 * Transform a be_AddSP into an ia32_SubSP.
3755 static ir_node *gen_be_AddSP(ir_node *node)
3757 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3758 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3760 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3761 match_am | match_immediate);
3765 * Transform a be_SubSP into an ia32_AddSP
3767 static ir_node *gen_be_SubSP(ir_node *node)
3769 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3770 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3772 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3773 match_am | match_immediate);
3777 * Change some phi modes
3779 static ir_node *gen_Phi(ir_node *node)
3781 ir_node *block = be_transform_node(get_nodes_block(node));
3782 ir_graph *irg = current_ir_graph;
3783 dbg_info *dbgi = get_irn_dbg_info(node);
3784 ir_mode *mode = get_irn_mode(node);
3787 if (ia32_mode_needs_gp_reg(mode)) {
3788 /* we shouldn't have any 64bit stuff around anymore */
3789 assert(get_mode_size_bits(mode) <= 32);
3790 /* all integer operations are on 32bit registers now */
3792 } else if (mode_is_float(mode)) {
3793 if (ia32_cg_config.use_sse2) {
3800 /* phi nodes allow loops, so we use the old arguments for now
3801 * and fix this later */
3802 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3803 get_irn_in(node) + 1);
3804 copy_node_attr(node, phi);
3805 be_duplicate_deps(node, phi);
3807 be_enqueue_preds(node);
3815 static ir_node *gen_IJmp(ir_node *node)
3817 ir_node *block = get_nodes_block(node);
3818 ir_node *new_block = be_transform_node(block);
3819 dbg_info *dbgi = get_irn_dbg_info(node);
3820 ir_node *op = get_IJmp_target(node);
3822 ia32_address_mode_t am;
3823 ia32_address_t *addr = &am.addr;
3825 assert(get_irn_mode(op) == mode_P);
3827 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3829 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3830 addr->mem, am.new_op2);
3831 set_am_attributes(new_node, &am);
3832 SET_IA32_ORIG_NODE(new_node, node);
3834 new_node = fix_mem_proj(new_node, &am);
3840 * Transform a Bound node.
3842 static ir_node *gen_Bound(ir_node *node)
3845 ir_node *lower = get_Bound_lower(node);
3846 dbg_info *dbgi = get_irn_dbg_info(node);
3848 if (is_Const_0(lower)) {
3849 /* typical case for Java */
3850 ir_node *sub, *res, *flags, *block;
3851 ir_graph *irg = current_ir_graph;
3853 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3854 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3856 block = get_nodes_block(res);
3857 if (! is_Proj(res)) {
3859 set_irn_mode(sub, mode_T);
3860 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3862 sub = get_Proj_pred(res);
3864 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3865 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3866 SET_IA32_ORIG_NODE(new_node, node);
3868 panic("generic Bound not supported in ia32 Backend");
3874 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3876 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3877 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3879 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3880 match_immediate | match_mode_neutral);
3883 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3885 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3886 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3887 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3891 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3893 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3894 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3895 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3899 static ir_node *gen_ia32_l_Add(ir_node *node)
3901 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3902 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3903 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3904 match_commutative | match_am | match_immediate |
3905 match_mode_neutral);
3907 if (is_Proj(lowered)) {
3908 lowered = get_Proj_pred(lowered);
3910 assert(is_ia32_Add(lowered));
3911 set_irn_mode(lowered, mode_T);
3917 static ir_node *gen_ia32_l_Adc(ir_node *node)
3919 return gen_binop_flags(node, new_bd_ia32_Adc,
3920 match_commutative | match_am | match_immediate |
3921 match_mode_neutral);
3925 * Transforms a l_MulS into a "real" MulS node.
3927 * @return the created ia32 Mul node
3929 static ir_node *gen_ia32_l_Mul(ir_node *node)
3931 ir_node *left = get_binop_left(node);
3932 ir_node *right = get_binop_right(node);
3934 return gen_binop(node, left, right, new_bd_ia32_Mul,
3935 match_commutative | match_am | match_mode_neutral);
3939 * Transforms a l_IMulS into a "real" IMul1OPS node.
3941 * @return the created ia32 IMul1OP node
3943 static ir_node *gen_ia32_l_IMul(ir_node *node)
3945 ir_node *left = get_binop_left(node);
3946 ir_node *right = get_binop_right(node);
3948 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3949 match_commutative | match_am | match_mode_neutral);
3952 static ir_node *gen_ia32_l_Sub(ir_node *node)
3954 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3955 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3956 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3957 match_am | match_immediate | match_mode_neutral);
3959 if (is_Proj(lowered)) {
3960 lowered = get_Proj_pred(lowered);
3962 assert(is_ia32_Sub(lowered));
3963 set_irn_mode(lowered, mode_T);
3969 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3971 return gen_binop_flags(node, new_bd_ia32_Sbb,
3972 match_am | match_immediate | match_mode_neutral);
3976 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3977 * op1 - target to be shifted
3978 * op2 - contains bits to be shifted into target
3980 * Only op3 can be an immediate.
3982 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3983 ir_node *low, ir_node *count)
3985 ir_node *block = get_nodes_block(node);
3986 ir_node *new_block = be_transform_node(block);
3987 dbg_info *dbgi = get_irn_dbg_info(node);
3988 ir_node *new_high = be_transform_node(high);
3989 ir_node *new_low = be_transform_node(low);
3993 /* the shift amount can be any mode that is bigger than 5 bits, since all
3994 * other bits are ignored anyway */
3995 while (is_Conv(count) &&
3996 get_irn_n_edges(count) == 1 &&
3997 mode_is_int(get_irn_mode(count))) {
3998 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3999 count = get_Conv_op(count);
4001 new_count = create_immediate_or_transform(count, 0);
4003 if (is_ia32_l_ShlD(node)) {
4004 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4007 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4010 SET_IA32_ORIG_NODE(new_node, node);
4015 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4017 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4018 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4019 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4020 return gen_lowered_64bit_shifts(node, high, low, count);
4023 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4025 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4026 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4027 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4028 return gen_lowered_64bit_shifts(node, high, low, count);
4031 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4033 ir_node *src_block = get_nodes_block(node);
4034 ir_node *block = be_transform_node(src_block);
4035 ir_graph *irg = current_ir_graph;
4036 dbg_info *dbgi = get_irn_dbg_info(node);
4037 ir_node *frame = get_irg_frame(irg);
4038 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4039 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4040 ir_node *new_val_low = be_transform_node(val_low);
4041 ir_node *new_val_high = be_transform_node(val_high);
4043 ir_node *sync, *fild, *res;
4044 ir_node *store_low, *store_high;
4046 if (ia32_cg_config.use_sse2) {
4047 panic("ia32_l_LLtoFloat not implemented for SSE2");
4051 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4053 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4055 SET_IA32_ORIG_NODE(store_low, node);
4056 SET_IA32_ORIG_NODE(store_high, node);
4058 set_ia32_use_frame(store_low);
4059 set_ia32_use_frame(store_high);
4060 set_ia32_op_type(store_low, ia32_AddrModeD);
4061 set_ia32_op_type(store_high, ia32_AddrModeD);
4062 set_ia32_ls_mode(store_low, mode_Iu);
4063 set_ia32_ls_mode(store_high, mode_Is);
4064 add_ia32_am_offs_int(store_high, 4);
4068 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4071 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4073 set_ia32_use_frame(fild);
4074 set_ia32_op_type(fild, ia32_AddrModeS);
4075 set_ia32_ls_mode(fild, mode_Ls);
4077 SET_IA32_ORIG_NODE(fild, node);
4079 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4081 if (! mode_is_signed(get_irn_mode(val_high))) {
4082 ia32_address_mode_t am;
4084 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4087 am.addr.base = noreg_GP;
4088 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4089 am.addr.mem = nomem;
4092 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4093 am.addr.use_frame = 0;
4094 am.addr.frame_entity = NULL;
4095 am.addr.symconst_sign = 0;
4096 am.ls_mode = mode_F;
4097 am.mem_proj = nomem;
4098 am.op_type = ia32_AddrModeS;
4100 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4101 am.pinned = op_pin_state_floats;
4103 am.ins_permuted = 0;
4105 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4106 am.new_op1, am.new_op2, get_fpcw());
4107 set_am_attributes(fadd, &am);
4109 set_irn_mode(fadd, mode_T);
4110 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4115 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4117 ir_node *src_block = get_nodes_block(node);
4118 ir_node *block = be_transform_node(src_block);
4119 ir_graph *irg = current_ir_graph;
4120 dbg_info *dbgi = get_irn_dbg_info(node);
4121 ir_node *frame = get_irg_frame(irg);
4122 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4123 ir_node *new_val = be_transform_node(val);
4124 ir_node *fist, *mem;
4126 mem = gen_vfist(dbgi, irg, block, frame, noreg_GP, nomem, new_val, &fist);
4127 SET_IA32_ORIG_NODE(fist, node);
4128 set_ia32_use_frame(fist);
4129 set_ia32_op_type(fist, ia32_AddrModeD);
4130 set_ia32_ls_mode(fist, mode_Ls);
4136 * the BAD transformer.
4138 static ir_node *bad_transform(ir_node *node)
4140 panic("No transform function for %+F available.", node);
4144 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4146 ir_graph *irg = current_ir_graph;
4147 ir_node *block = be_transform_node(get_nodes_block(node));
4148 ir_node *pred = get_Proj_pred(node);
4149 ir_node *new_pred = be_transform_node(pred);
4150 ir_node *frame = get_irg_frame(irg);
4151 dbg_info *dbgi = get_irn_dbg_info(node);
4152 long pn = get_Proj_proj(node);
4157 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4158 SET_IA32_ORIG_NODE(load, node);
4159 set_ia32_use_frame(load);
4160 set_ia32_op_type(load, ia32_AddrModeS);
4161 set_ia32_ls_mode(load, mode_Iu);
4162 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4163 * 32 bit from it with this particular load */
4164 attr = get_ia32_attr(load);
4165 attr->data.need_64bit_stackent = 1;
4167 if (pn == pn_ia32_l_FloattoLL_res_high) {
4168 add_ia32_am_offs_int(load, 4);
4170 assert(pn == pn_ia32_l_FloattoLL_res_low);
4173 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4179 * Transform the Projs of an AddSP.
4181 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4183 ir_node *block = be_transform_node(get_nodes_block(node));
4184 ir_node *pred = get_Proj_pred(node);
4185 ir_node *new_pred = be_transform_node(pred);
4186 ir_graph *irg = current_ir_graph;
4187 dbg_info *dbgi = get_irn_dbg_info(node);
4188 long proj = get_Proj_proj(node);
4190 if (proj == pn_be_AddSP_sp) {
4191 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4192 pn_ia32_SubSP_stack);
4193 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4195 } else if (proj == pn_be_AddSP_res) {
4196 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4197 pn_ia32_SubSP_addr);
4198 } else if (proj == pn_be_AddSP_M) {
4199 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4202 panic("No idea how to transform proj->AddSP");
4206 * Transform the Projs of a SubSP.
4208 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4210 ir_node *block = be_transform_node(get_nodes_block(node));
4211 ir_node *pred = get_Proj_pred(node);
4212 ir_node *new_pred = be_transform_node(pred);
4213 ir_graph *irg = current_ir_graph;
4214 dbg_info *dbgi = get_irn_dbg_info(node);
4215 long proj = get_Proj_proj(node);
4217 if (proj == pn_be_SubSP_sp) {
4218 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4219 pn_ia32_AddSP_stack);
4220 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4222 } else if (proj == pn_be_SubSP_M) {
4223 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4226 panic("No idea how to transform proj->SubSP");
4230 * Transform and renumber the Projs from a Load.
4232 static ir_node *gen_Proj_Load(ir_node *node)
4235 ir_node *block = be_transform_node(get_nodes_block(node));
4236 ir_node *pred = get_Proj_pred(node);
4237 ir_graph *irg = current_ir_graph;
4238 dbg_info *dbgi = get_irn_dbg_info(node);
4239 long proj = get_Proj_proj(node);
4241 /* loads might be part of source address mode matches, so we don't
4242 * transform the ProjMs yet (with the exception of loads whose result is
4245 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4248 /* this is needed, because sometimes we have loops that are only
4249 reachable through the ProjM */
4250 be_enqueue_preds(node);
4251 /* do it in 2 steps, to silence firm verifier */
4252 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4253 set_Proj_proj(res, pn_ia32_mem);
4257 /* renumber the proj */
4258 new_pred = be_transform_node(pred);
4259 if (is_ia32_Load(new_pred)) {
4262 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4264 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4265 case pn_Load_X_regular:
4266 return new_rd_Jmp(dbgi, irg, block);
4267 case pn_Load_X_except:
4268 /* This Load might raise an exception. Mark it. */
4269 set_ia32_exc_label(new_pred, 1);
4270 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4274 } else if (is_ia32_Conv_I2I(new_pred) ||
4275 is_ia32_Conv_I2I8Bit(new_pred)) {
4276 set_irn_mode(new_pred, mode_T);
4277 if (proj == pn_Load_res) {
4278 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4279 } else if (proj == pn_Load_M) {
4280 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4282 } else if (is_ia32_xLoad(new_pred)) {
4285 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4287 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4288 case pn_Load_X_regular:
4289 return new_rd_Jmp(dbgi, irg, block);
4290 case pn_Load_X_except:
4291 /* This Load might raise an exception. Mark it. */
4292 set_ia32_exc_label(new_pred, 1);
4293 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4297 } else if (is_ia32_vfld(new_pred)) {
4300 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4302 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4303 case pn_Load_X_regular:
4304 return new_rd_Jmp(dbgi, irg, block);
4305 case pn_Load_X_except:
4306 /* This Load might raise an exception. Mark it. */
4307 set_ia32_exc_label(new_pred, 1);
4308 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4313 /* can happen for ProJMs when source address mode happened for the
4316 /* however it should not be the result proj, as that would mean the
4317 load had multiple users and should not have been used for
4319 if (proj != pn_Load_M) {
4320 panic("internal error: transformed node not a Load");
4322 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4325 panic("No idea how to transform proj");
4329 * Transform and renumber the Projs from a DivMod like instruction.
4331 static ir_node *gen_Proj_DivMod(ir_node *node)
4333 ir_node *block = be_transform_node(get_nodes_block(node));
4334 ir_node *pred = get_Proj_pred(node);
4335 ir_node *new_pred = be_transform_node(pred);
4336 ir_graph *irg = current_ir_graph;
4337 dbg_info *dbgi = get_irn_dbg_info(node);
4338 long proj = get_Proj_proj(node);
4340 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4342 switch (get_irn_opcode(pred)) {
4346 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4348 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4349 case pn_Div_X_regular:
4350 return new_rd_Jmp(dbgi, irg, block);
4351 case pn_Div_X_except:
4352 set_ia32_exc_label(new_pred, 1);
4353 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4361 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4363 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4364 case pn_Mod_X_except:
4365 set_ia32_exc_label(new_pred, 1);
4366 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4374 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4375 case pn_DivMod_res_div:
4376 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4377 case pn_DivMod_res_mod:
4378 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4379 case pn_DivMod_X_regular:
4380 return new_rd_Jmp(dbgi, irg, block);
4381 case pn_DivMod_X_except:
4382 set_ia32_exc_label(new_pred, 1);
4383 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4392 panic("No idea how to transform proj->DivMod");
4396 * Transform and renumber the Projs from a CopyB.
4398 static ir_node *gen_Proj_CopyB(ir_node *node)
4400 ir_node *block = be_transform_node(get_nodes_block(node));
4401 ir_node *pred = get_Proj_pred(node);
4402 ir_node *new_pred = be_transform_node(pred);
4403 ir_graph *irg = current_ir_graph;
4404 dbg_info *dbgi = get_irn_dbg_info(node);
4405 long proj = get_Proj_proj(node);
4408 case pn_CopyB_M_regular:
4409 if (is_ia32_CopyB_i(new_pred)) {
4410 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4411 } else if (is_ia32_CopyB(new_pred)) {
4412 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4419 panic("No idea how to transform proj->CopyB");
4423 * Transform and renumber the Projs from a Quot.
4425 static ir_node *gen_Proj_Quot(ir_node *node)
4427 ir_node *block = be_transform_node(get_nodes_block(node));
4428 ir_node *pred = get_Proj_pred(node);
4429 ir_node *new_pred = be_transform_node(pred);
4430 ir_graph *irg = current_ir_graph;
4431 dbg_info *dbgi = get_irn_dbg_info(node);
4432 long proj = get_Proj_proj(node);
4436 if (is_ia32_xDiv(new_pred)) {
4437 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4438 } else if (is_ia32_vfdiv(new_pred)) {
4439 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4443 if (is_ia32_xDiv(new_pred)) {
4444 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4445 } else if (is_ia32_vfdiv(new_pred)) {
4446 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4449 case pn_Quot_X_regular:
4450 case pn_Quot_X_except:
4455 panic("No idea how to transform proj->Quot");
4458 static ir_node *gen_be_Call(ir_node *node)
4460 dbg_info *const dbgi = get_irn_dbg_info(node);
4461 ir_graph *const irg = current_ir_graph;
4462 ir_node *const src_block = get_nodes_block(node);
4463 ir_node *const block = be_transform_node(src_block);
4464 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4465 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4466 ir_node *const sp = be_transform_node(src_sp);
4467 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4468 ia32_address_mode_t am;
4469 ia32_address_t *const addr = &am.addr;
4474 ir_node * eax = noreg_GP;
4475 ir_node * ecx = noreg_GP;
4476 ir_node * edx = noreg_GP;
4477 unsigned const pop = be_Call_get_pop(node);
4478 ir_type *const call_tp = be_Call_get_type(node);
4479 int old_no_pic_adjust;
4481 /* Run the x87 simulator if the call returns a float value */
4482 if (get_method_n_ress(call_tp) > 0) {
4483 ir_type *const res_type = get_method_res_type(call_tp, 0);
4484 ir_mode *const res_mode = get_type_mode(res_type);
4486 if (res_mode != NULL && mode_is_float(res_mode)) {
4487 env_cg->do_x87_sim = 1;
4491 /* We do not want be_Call direct calls */
4492 assert(be_Call_get_entity(node) == NULL);
4494 /* special case for PIC trampoline calls */
4495 old_no_pic_adjust = no_pic_adjust;
4496 no_pic_adjust = env_cg->birg->main_env->options->pic;
4498 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4499 match_am | match_immediate);
4501 no_pic_adjust = old_no_pic_adjust;
4503 i = get_irn_arity(node) - 1;
4504 fpcw = be_transform_node(get_irn_n(node, i--));
4505 for (; i >= be_pos_Call_first_arg; --i) {
4506 arch_register_req_t const *const req = arch_get_register_req(node, i);
4507 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4509 assert(req->type == arch_register_req_type_limited);
4510 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4512 switch (*req->limited) {
4513 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4514 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4515 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4516 default: panic("Invalid GP register for register parameter");
4520 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4521 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4522 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4523 set_am_attributes(call, &am);
4524 call = fix_mem_proj(call, &am);
4526 if (get_irn_pinned(node) == op_pin_state_pinned)
4527 set_irn_pinned(call, op_pin_state_pinned);
4529 SET_IA32_ORIG_NODE(call, node);
4531 if (ia32_cg_config.use_sse2) {
4532 /* remember this call for post-processing */
4533 ARR_APP1(ir_node *, call_list, call);
4534 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4541 * Transform Builtin trap
4543 static ir_node *gen_trap(ir_node *node) {
4544 dbg_info *dbgi = get_irn_dbg_info(node);
4545 ir_node *block = be_transform_node(get_nodes_block(node));
4546 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4548 return new_bd_ia32_UD2(dbgi, block, mem);
4552 * Transform Builtin debugbreak
4554 static ir_node *gen_debugbreak(ir_node *node) {
4555 dbg_info *dbgi = get_irn_dbg_info(node);
4556 ir_node *block = be_transform_node(get_nodes_block(node));
4557 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4559 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4563 * Transform Builtin return_address
4565 static ir_node *gen_return_address(ir_node *node) {
4566 ir_node *param = get_Builtin_param(node, 0);
4567 ir_node *frame = get_Builtin_param(node, 1);
4568 dbg_info *dbgi = get_irn_dbg_info(node);
4569 tarval *tv = get_Const_tarval(param);
4570 unsigned long value = get_tarval_long(tv);
4572 ir_node *block = be_transform_node(get_nodes_block(node));
4573 ir_node *ptr = be_transform_node(frame);
4577 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4578 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4579 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4582 /* load the return address from this frame */
4583 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4585 set_irn_pinned(load, get_irn_pinned(node));
4586 set_ia32_op_type(load, ia32_AddrModeS);
4587 set_ia32_ls_mode(load, mode_Iu);
4589 set_ia32_am_offs_int(load, 0);
4590 set_ia32_use_frame(load);
4591 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4593 if (get_irn_pinned(node) == op_pin_state_floats) {
4594 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4595 && pn_ia32_vfld_res == pn_ia32_Load_res
4596 && pn_ia32_Load_res == pn_ia32_res);
4597 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4600 SET_IA32_ORIG_NODE(load, node);
4601 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4605 * Transform Builtin frame_address
4607 static ir_node *gen_frame_address(ir_node *node) {
4608 ir_node *param = get_Builtin_param(node, 0);
4609 ir_node *frame = get_Builtin_param(node, 1);
4610 dbg_info *dbgi = get_irn_dbg_info(node);
4611 tarval *tv = get_Const_tarval(param);
4612 unsigned long value = get_tarval_long(tv);
4614 ir_node *block = be_transform_node(get_nodes_block(node));
4615 ir_node *ptr = be_transform_node(frame);
4620 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4621 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4622 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4625 /* load the frame address from this frame */
4626 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4628 set_irn_pinned(load, get_irn_pinned(node));
4629 set_ia32_op_type(load, ia32_AddrModeS);
4630 set_ia32_ls_mode(load, mode_Iu);
4632 ent = ia32_get_frame_address_entity();
4634 set_ia32_am_offs_int(load, 0);
4635 set_ia32_use_frame(load);
4636 set_ia32_frame_ent(load, ent);
4638 /* will fail anyway, but gcc does this: */
4639 set_ia32_am_offs_int(load, 0);
4642 if (get_irn_pinned(node) == op_pin_state_floats) {
4643 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4644 && pn_ia32_vfld_res == pn_ia32_Load_res
4645 && pn_ia32_Load_res == pn_ia32_res);
4646 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4649 SET_IA32_ORIG_NODE(load, node);
4650 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4654 * Transform Builtin frame_address
4656 static ir_node *gen_prefetch(ir_node *node) {
4658 ir_node *ptr, *block, *mem, *base, *index;
4659 ir_node *param, *new_node;
4662 ia32_address_t addr;
4664 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4665 /* no prefetch at all, route memory */
4666 return be_transform_node(get_Builtin_mem(node));
4669 param = get_Builtin_param(node, 1);
4670 tv = get_Const_tarval(param);
4671 rw = get_tarval_long(tv);
4673 /* construct load address */
4674 memset(&addr, 0, sizeof(addr));
4675 ptr = get_Builtin_param(node, 0);
4676 ia32_create_address_mode(&addr, ptr, 0);
4683 base = be_transform_node(base);
4686 if (index == NULL) {
4689 index = be_transform_node(index);
4692 dbgi = get_irn_dbg_info(node);
4693 block = be_transform_node(get_nodes_block(node));
4694 mem = be_transform_node(get_Builtin_mem(node));
4696 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4697 /* we have 3DNow!, this was already checked above */
4698 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4699 } else if (ia32_cg_config.use_sse_prefetch) {
4700 /* note: rw == 1 is IGNORED in that case */
4701 param = get_Builtin_param(node, 2);
4702 tv = get_Const_tarval(param);
4703 locality = get_tarval_long(tv);
4705 /* SSE style prefetch */
4708 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4711 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4714 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4717 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4721 assert(ia32_cg_config.use_3dnow_prefetch);
4722 /* 3DNow! style prefetch */
4723 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4726 set_irn_pinned(new_node, get_irn_pinned(node));
4727 set_ia32_op_type(new_node, ia32_AddrModeS);
4728 set_ia32_ls_mode(new_node, mode_Bu);
4729 set_address(new_node, &addr);
4731 SET_IA32_ORIG_NODE(new_node, node);
4733 be_dep_on_frame(new_node);
4734 return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M);
4738 * Transform bsf like node
4740 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4742 ir_node *param = get_Builtin_param(node, 0);
4743 dbg_info *dbgi = get_irn_dbg_info(node);
4745 ir_node *block = get_nodes_block(node);
4746 ir_node *new_block = be_transform_node(block);
4748 ia32_address_mode_t am;
4749 ia32_address_t *addr = &am.addr;
4752 match_arguments(&am, block, NULL, param, NULL, match_am);
4754 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4755 set_am_attributes(cnt, &am);
4756 set_ia32_ls_mode(cnt, get_irn_mode(param));
4758 SET_IA32_ORIG_NODE(cnt, node);
4759 return fix_mem_proj(cnt, &am);
4763 * Transform builtin ffs.
4765 static ir_node *gen_ffs(ir_node *node)
4767 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4768 ir_node *real = skip_Proj(bsf);
4769 dbg_info *dbgi = get_irn_dbg_info(real);
4770 ir_node *block = get_nodes_block(real);
4771 ir_node *flag, *set, *conv, *neg, *or;
4774 if (get_irn_mode(real) != mode_T) {
4775 set_irn_mode(real, mode_T);
4776 bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res);
4779 flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags);
4782 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4783 SET_IA32_ORIG_NODE(set, node);
4786 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4787 SET_IA32_ORIG_NODE(conv, node);
4790 neg = new_bd_ia32_Neg(dbgi, block, conv);
4793 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4794 set_ia32_commutative(or);
4797 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4801 * Transform builtin clz.
4803 static ir_node *gen_clz(ir_node *node)
4805 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4806 ir_node *real = skip_Proj(bsr);
4807 dbg_info *dbgi = get_irn_dbg_info(real);
4808 ir_node *block = get_nodes_block(real);
4809 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4811 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4815 * Transform builtin ctz.
4817 static ir_node *gen_ctz(ir_node *node)
4819 return gen_unop_AM(node, new_bd_ia32_Bsf);
4823 * Transform builtin parity.
4825 static ir_node *gen_parity(ir_node *node)
4827 ir_node *param = get_Builtin_param(node, 0);
4828 dbg_info *dbgi = get_irn_dbg_info(node);
4830 ir_node *block = get_nodes_block(node);
4832 ir_node *new_block = be_transform_node(block);
4833 ir_node *imm, *cmp, *new_node;
4835 ia32_address_mode_t am;
4836 ia32_address_t *addr = &am.addr;
4840 match_arguments(&am, block, NULL, param, NULL, match_am);
4841 imm = ia32_create_Immediate(NULL, 0, 0);
4842 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4843 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4844 set_am_attributes(cmp, &am);
4845 set_ia32_ls_mode(cmp, mode_Iu);
4847 SET_IA32_ORIG_NODE(cmp, node);
4849 cmp = fix_mem_proj(cmp, &am);
4852 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4853 SET_IA32_ORIG_NODE(new_node, node);
4856 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4857 nomem, new_node, mode_Bu);
4858 SET_IA32_ORIG_NODE(new_node, node);
4863 * Transform builtin popcount
4865 static ir_node *gen_popcount(ir_node *node) {
4866 ir_node *param = get_Builtin_param(node, 0);
4867 dbg_info *dbgi = get_irn_dbg_info(node);
4869 ir_node *block = get_nodes_block(node);
4870 ir_node *new_block = be_transform_node(block);
4873 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4875 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4876 if (ia32_cg_config.use_popcnt) {
4877 ia32_address_mode_t am;
4878 ia32_address_t *addr = &am.addr;
4881 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4883 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4884 set_am_attributes(cnt, &am);
4885 set_ia32_ls_mode(cnt, get_irn_mode(param));
4887 SET_IA32_ORIG_NODE(cnt, node);
4888 return fix_mem_proj(cnt, &am);
4891 new_param = be_transform_node(param);
4893 /* do the standard popcount algo */
4895 /* m1 = x & 0x55555555 */
4896 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4897 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4900 simm = ia32_create_Immediate(NULL, 0, 1);
4901 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4903 /* m2 = s1 & 0x55555555 */
4904 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4907 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4909 /* m4 = m3 & 0x33333333 */
4910 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4911 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4914 simm = ia32_create_Immediate(NULL, 0, 2);
4915 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4917 /* m5 = s2 & 0x33333333 */
4918 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4921 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4923 /* m7 = m6 & 0x0F0F0F0F */
4924 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4925 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4928 simm = ia32_create_Immediate(NULL, 0, 4);
4929 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4931 /* m8 = s3 & 0x0F0F0F0F */
4932 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4935 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4937 /* m10 = m9 & 0x00FF00FF */
4938 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4939 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4942 simm = ia32_create_Immediate(NULL, 0, 8);
4943 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4945 /* m11 = s4 & 0x00FF00FF */
4946 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4948 /* m12 = m10 + m11 */
4949 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4951 /* m13 = m12 & 0x0000FFFF */
4952 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4953 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4955 /* s5 = m12 >> 16 */
4956 simm = ia32_create_Immediate(NULL, 0, 16);
4957 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4959 /* res = m13 + s5 */
4960 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4964 * Transform builtin byte swap.
4966 static ir_node *gen_bswap(ir_node *node) {
4967 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4968 dbg_info *dbgi = get_irn_dbg_info(node);
4970 ir_node *block = get_nodes_block(node);
4971 ir_node *new_block = be_transform_node(block);
4972 ir_mode *mode = get_irn_mode(param);
4973 unsigned size = get_mode_size_bits(mode);
4974 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4978 if (ia32_cg_config.use_i486) {
4979 /* swap available */
4980 return new_bd_ia32_Bswap(dbgi, new_block, param);
4982 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4983 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4985 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4986 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4988 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4990 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4991 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4993 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4994 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4997 /* swap16 always available */
4998 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5001 panic("Invalid bswap size (%d)", size);
5006 * Transform builtin outport.
5008 static ir_node *gen_outport(ir_node *node) {
5009 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5010 ir_node *oldv = get_Builtin_param(node, 1);
5011 ir_mode *mode = get_irn_mode(oldv);
5012 ir_node *value = be_transform_node(oldv);
5013 ir_node *block = be_transform_node(get_nodes_block(node));
5014 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5015 dbg_info *dbgi = get_irn_dbg_info(node);
5017 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5018 set_ia32_ls_mode(res, mode);
5023 * Transform builtin inport.
5025 static ir_node *gen_inport(ir_node *node) {
5026 ir_type *tp = get_Builtin_type(node);
5027 ir_type *rstp = get_method_res_type(tp, 0);
5028 ir_mode *mode = get_type_mode(rstp);
5029 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5030 ir_node *block = be_transform_node(get_nodes_block(node));
5031 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5032 dbg_info *dbgi = get_irn_dbg_info(node);
5034 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5035 set_ia32_ls_mode(res, mode);
5037 /* check for missing Result Proj */
5042 * Transform a builtin inner trampoline
5044 static ir_node *gen_inner_trampoline(ir_node *node) {
5045 ir_node *ptr = get_Builtin_param(node, 0);
5046 ir_node *callee = get_Builtin_param(node, 1);
5047 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5048 ir_node *mem = get_Builtin_mem(node);
5049 ir_node *block = get_nodes_block(node);
5050 ir_node *new_block = be_transform_node(block);
5054 ir_node *trampoline;
5056 dbg_info *dbgi = get_irn_dbg_info(node);
5057 ia32_address_t addr;
5059 /* construct store address */
5060 memset(&addr, 0, sizeof(addr));
5061 ia32_create_address_mode(&addr, ptr, 0);
5063 if (addr.base == NULL) {
5064 addr.base = noreg_GP;
5066 addr.base = be_transform_node(addr.base);
5069 if (addr.index == NULL) {
5070 addr.index = noreg_GP;
5072 addr.index = be_transform_node(addr.index);
5074 addr.mem = be_transform_node(mem);
5076 /* mov ecx, <env> */
5077 val = ia32_create_Immediate(NULL, 0, 0xB9);
5078 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5079 addr.index, addr.mem, val);
5080 set_irn_pinned(store, get_irn_pinned(node));
5081 set_ia32_op_type(store, ia32_AddrModeD);
5082 set_ia32_ls_mode(store, mode_Bu);
5083 set_address(store, &addr);
5087 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5088 addr.index, addr.mem, env);
5089 set_irn_pinned(store, get_irn_pinned(node));
5090 set_ia32_op_type(store, ia32_AddrModeD);
5091 set_ia32_ls_mode(store, mode_Iu);
5092 set_address(store, &addr);
5096 /* jmp rel <callee> */
5097 val = ia32_create_Immediate(NULL, 0, 0xE9);
5098 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5099 addr.index, addr.mem, val);
5100 set_irn_pinned(store, get_irn_pinned(node));
5101 set_ia32_op_type(store, ia32_AddrModeD);
5102 set_ia32_ls_mode(store, mode_Bu);
5103 set_address(store, &addr);
5107 trampoline = be_transform_node(ptr);
5109 /* the callee is typically an immediate */
5110 if (is_SymConst(callee)) {
5111 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5113 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5115 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5117 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5118 addr.index, addr.mem, rel);
5119 set_irn_pinned(store, get_irn_pinned(node));
5120 set_ia32_op_type(store, ia32_AddrModeD);
5121 set_ia32_ls_mode(store, mode_Iu);
5122 set_address(store, &addr);
5127 return new_r_Tuple(current_ir_graph, new_block, 2, in);
5131 * Transform Builtin node.
5133 static ir_node *gen_Builtin(ir_node *node) {
5134 ir_builtin_kind kind = get_Builtin_kind(node);
5138 return gen_trap(node);
5139 case ir_bk_debugbreak:
5140 return gen_debugbreak(node);
5141 case ir_bk_return_address:
5142 return gen_return_address(node);
5143 case ir_bk_frame_address:
5144 return gen_frame_address(node);
5145 case ir_bk_prefetch:
5146 return gen_prefetch(node);
5148 return gen_ffs(node);
5150 return gen_clz(node);
5152 return gen_ctz(node);
5154 return gen_parity(node);
5155 case ir_bk_popcount:
5156 return gen_popcount(node);
5158 return gen_bswap(node);
5160 return gen_outport(node);
5162 return gen_inport(node);
5163 case ir_bk_inner_trampoline:
5164 return gen_inner_trampoline(node);
5166 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5170 * Transform Proj(Builtin) node.
5172 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5173 ir_node *node = get_Proj_pred(proj);
5174 ir_node *new_node = be_transform_node(node);
5175 ir_builtin_kind kind = get_Builtin_kind(node);
5178 case ir_bk_return_address:
5179 case ir_bk_frame_address:
5184 case ir_bk_popcount:
5186 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5189 case ir_bk_debugbreak:
5190 case ir_bk_prefetch:
5192 assert(get_Proj_proj(proj) == pn_Builtin_M);
5195 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5196 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5197 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5199 assert(get_Proj_proj(proj) == pn_Builtin_M);
5200 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5201 new_node, mode_M, pn_ia32_Inport_M);
5203 case ir_bk_inner_trampoline:
5204 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5205 return get_Tuple_pred(new_node, 1);
5207 assert(get_Proj_proj(proj) == pn_Builtin_M);
5208 return get_Tuple_pred(new_node, 0);
5211 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5214 static ir_node *gen_be_IncSP(ir_node *node)
5216 ir_node *res = be_duplicate_node(node);
5217 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5223 * Transform the Projs from a be_Call.
5225 static ir_node *gen_Proj_be_Call(ir_node *node)
5227 ir_node *block = be_transform_node(get_nodes_block(node));
5228 ir_node *call = get_Proj_pred(node);
5229 ir_node *new_call = be_transform_node(call);
5230 ir_graph *irg = current_ir_graph;
5231 dbg_info *dbgi = get_irn_dbg_info(node);
5232 long proj = get_Proj_proj(node);
5233 ir_mode *mode = get_irn_mode(node);
5236 if (proj == pn_be_Call_M_regular) {
5237 return new_rd_Proj(dbgi, irg, block, new_call, mode_M, n_ia32_Call_mem);
5239 /* transform call modes */
5240 if (mode_is_data(mode)) {
5241 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5245 /* Map from be_Call to ia32_Call proj number */
5246 if (proj == pn_be_Call_sp) {
5247 proj = pn_ia32_Call_stack;
5248 } else if (proj == pn_be_Call_M_regular) {
5249 proj = pn_ia32_Call_M;
5251 arch_register_req_t const *const req = arch_get_register_req_out(node);
5252 int const n_outs = arch_irn_get_n_outs(new_call);
5255 assert(proj >= pn_be_Call_first_res);
5256 assert(req->type & arch_register_req_type_limited);
5258 for (i = 0; i < n_outs; ++i) {
5259 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5261 if (!(new_req->type & arch_register_req_type_limited) ||
5262 new_req->cls != req->cls ||
5263 *new_req->limited != *req->limited)
5272 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
5274 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5276 case pn_ia32_Call_stack:
5277 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5280 case pn_ia32_Call_fpcw:
5281 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5289 * Transform the Projs from a Cmp.
5291 static ir_node *gen_Proj_Cmp(ir_node *node)
5293 /* this probably means not all mode_b nodes were lowered... */
5294 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5299 * Transform the Projs from a Bound.
5301 static ir_node *gen_Proj_Bound(ir_node *node)
5303 ir_node *new_node, *block;
5304 ir_node *pred = get_Proj_pred(node);
5306 switch (get_Proj_proj(node)) {
5308 return be_transform_node(get_Bound_mem(pred));
5309 case pn_Bound_X_regular:
5310 new_node = be_transform_node(pred);
5311 block = get_nodes_block(new_node);
5312 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
5313 case pn_Bound_X_except:
5314 new_node = be_transform_node(pred);
5315 block = get_nodes_block(new_node);
5316 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
5318 return be_transform_node(get_Bound_index(pred));
5320 panic("unsupported Proj from Bound");
5324 static ir_node *gen_Proj_ASM(ir_node *node)
5326 ir_mode *mode = get_irn_mode(node);
5327 ir_node *pred = get_Proj_pred(node);
5328 ir_node *new_pred = be_transform_node(pred);
5329 ir_node *block = get_nodes_block(new_pred);
5330 long pos = get_Proj_proj(node);
5332 if (mode == mode_M) {
5333 pos = arch_irn_get_n_outs(new_pred) + 1;
5334 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5336 } else if (mode_is_float(mode)) {
5339 panic("unexpected proj mode at ASM");
5342 return new_r_Proj(current_ir_graph, block, new_pred, mode, pos);
5346 * Transform and potentially renumber Proj nodes.
5348 static ir_node *gen_Proj(ir_node *node)
5350 ir_node *pred = get_Proj_pred(node);
5353 switch (get_irn_opcode(pred)) {
5355 proj = get_Proj_proj(node);
5356 if (proj == pn_Store_M) {
5357 return be_transform_node(pred);
5359 panic("No idea how to transform proj->Store");
5362 return gen_Proj_Load(node);
5364 return gen_Proj_ASM(node);
5366 return gen_Proj_Builtin(node);
5370 return gen_Proj_DivMod(node);
5372 return gen_Proj_CopyB(node);
5374 return gen_Proj_Quot(node);
5376 return gen_Proj_be_SubSP(node);
5378 return gen_Proj_be_AddSP(node);
5380 return gen_Proj_be_Call(node);
5382 return gen_Proj_Cmp(node);
5384 return gen_Proj_Bound(node);
5386 proj = get_Proj_proj(node);
5388 case pn_Start_X_initial_exec: {
5389 ir_node *block = get_nodes_block(pred);
5390 ir_node *new_block = be_transform_node(block);
5391 dbg_info *dbgi = get_irn_dbg_info(node);
5392 /* we exchange the ProjX with a jump */
5393 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
5398 case pn_Start_P_tls:
5399 return gen_Proj_tls(node);
5404 if (is_ia32_l_FloattoLL(pred)) {
5405 return gen_Proj_l_FloattoLL(node);
5407 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5411 ir_mode *mode = get_irn_mode(node);
5412 if (ia32_mode_needs_gp_reg(mode)) {
5413 ir_node *new_pred = be_transform_node(pred);
5414 ir_node *block = be_transform_node(get_nodes_block(node));
5415 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5416 mode_Iu, get_Proj_proj(node));
5417 new_proj->node_nr = node->node_nr;
5422 return be_duplicate_node(node);
5426 * Enters all transform functions into the generic pointer
5428 static void register_transformers(void)
5430 /* first clear the generic function pointer for all ops */
5431 clear_irp_opcodes_generic_func();
5433 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5434 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5473 /* transform ops from intrinsic lowering */
5485 GEN(ia32_l_LLtoFloat);
5486 GEN(ia32_l_FloattoLL);
5492 /* we should never see these nodes */
5507 /* handle builtins */
5510 /* handle generic backend nodes */
5524 * Pre-transform all unknown and noreg nodes.
5526 static void ia32_pretransform_node(void)
5528 ia32_code_gen_t *cg = env_cg;
5530 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5531 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5532 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5533 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5534 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5535 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5537 nomem = get_irg_no_mem(current_ir_graph);
5538 noreg_GP = ia32_new_NoReg_gp(cg);
5544 * Walker, checks if all ia32 nodes producing more than one result have their
5545 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5547 static void add_missing_keep_walker(ir_node *node, void *data)
5550 unsigned found_projs = 0;
5551 const ir_edge_t *edge;
5552 ir_mode *mode = get_irn_mode(node);
5557 if (!is_ia32_irn(node))
5560 n_outs = arch_irn_get_n_outs(node);
5563 if (is_ia32_SwitchJmp(node))
5566 assert(n_outs < (int) sizeof(unsigned) * 8);
5567 foreach_out_edge(node, edge) {
5568 ir_node *proj = get_edge_src_irn(edge);
5571 /* The node could be kept */
5575 if (get_irn_mode(proj) == mode_M)
5578 pn = get_Proj_proj(proj);
5579 assert(pn < n_outs);
5580 found_projs |= 1 << pn;
5584 /* are keeps missing? */
5586 for (i = 0; i < n_outs; ++i) {
5589 const arch_register_req_t *req;
5590 const arch_register_class_t *cls;
5592 if (found_projs & (1 << i)) {
5596 req = get_ia32_out_req(node, i);
5601 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5605 block = get_nodes_block(node);
5606 in[0] = new_r_Proj(current_ir_graph, block, node,
5607 arch_register_class_mode(cls), i);
5608 if (last_keep != NULL) {
5609 be_Keep_add_node(last_keep, cls, in[0]);
5611 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
5612 if (sched_is_scheduled(node)) {
5613 sched_add_after(node, last_keep);
5620 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5623 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5625 ir_graph *irg = be_get_birg_irg(cg->birg);
5626 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5630 * Post-process all calls if we are in SSE mode.
5631 * The ABI requires that the results are in st0, copy them
5632 * to a xmm register.
5634 static void postprocess_fp_call_results(void) {
5637 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5638 ir_node *call = call_list[i];
5639 ir_type *mtp = call_types[i];
5642 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5643 ir_type *res_tp = get_method_res_type(mtp, j);
5644 ir_node *res, *new_res;
5645 const ir_edge_t *edge, *next;
5648 if (! is_atomic_type(res_tp)) {
5649 /* no floating point return */
5652 mode = get_type_mode(res_tp);
5653 if (! mode_is_float(mode)) {
5654 /* no floating point return */
5658 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5661 /* now patch the users */
5662 foreach_out_edge_safe(res, edge, next) {
5663 ir_node *succ = get_edge_src_irn(edge);
5666 if (be_is_Keep(succ))
5669 if (is_ia32_xStore(succ)) {
5670 /* an xStore can be patched into an vfst */
5671 dbg_info *db = get_irn_dbg_info(succ);
5672 ir_node *block = get_nodes_block(succ);
5673 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5674 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5675 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5676 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5677 ir_mode *mode = get_ia32_ls_mode(succ);
5679 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5680 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5681 if (is_ia32_use_frame(succ))
5682 set_ia32_use_frame(st);
5683 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5684 set_irn_pinned(st, get_irn_pinned(succ));
5685 set_ia32_op_type(st, ia32_AddrModeD);
5689 if (new_res == NULL) {
5690 dbg_info *db = get_irn_dbg_info(call);
5691 ir_node *block = get_nodes_block(call);
5692 ir_node *frame = get_irg_frame(current_ir_graph);
5693 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5694 ir_node *call_mem = new_r_Proj(current_ir_graph, block, call, mode_M, pn_ia32_Call_M);
5695 ir_node *vfst, *xld, *new_mem;
5697 /* store st(0) on stack */
5698 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5699 set_ia32_op_type(vfst, ia32_AddrModeD);
5700 set_ia32_use_frame(vfst);
5702 /* load into SSE register */
5703 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5704 set_ia32_op_type(xld, ia32_AddrModeS);
5705 set_ia32_use_frame(xld);
5707 new_res = new_r_Proj(current_ir_graph, block, xld, mode, pn_ia32_xLoad_res);
5708 new_mem = new_r_Proj(current_ir_graph, block, xld, mode_M, pn_ia32_xLoad_M);
5710 if (old_mem != NULL) {
5711 edges_reroute(old_mem, new_mem, current_ir_graph);
5715 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5722 /* do the transformation */
5723 void ia32_transform_graph(ia32_code_gen_t *cg)
5727 register_transformers();
5729 initial_fpcw = NULL;
5732 BE_TIMER_PUSH(t_heights);
5733 heights = heights_new(cg->irg);
5734 BE_TIMER_POP(t_heights);
5735 ia32_calculate_non_address_mode_nodes(cg->birg);
5737 /* the transform phase is not safe for CSE (yet) because several nodes get
5738 * attributes set after their creation */
5739 cse_last = get_opt_cse();
5742 call_list = NEW_ARR_F(ir_node *, 0);
5743 call_types = NEW_ARR_F(ir_type *, 0);
5744 be_transform_graph(cg->birg, ia32_pretransform_node);
5746 if (ia32_cg_config.use_sse2)
5747 postprocess_fp_call_results();
5748 DEL_ARR_F(call_types);
5749 DEL_ARR_F(call_list);
5751 set_opt_cse(cse_last);
5753 ia32_free_non_address_mode_nodes();
5754 heights_free(heights);
5758 void ia32_init_transform(void)
5760 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");