2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
844 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (mode_is_signed(mode)) {
1299 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1300 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1301 mode_Iu, pn_ia32_IMul1OP_res_high);
1303 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1304 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1305 mode_Iu, pn_ia32_Mul_res_high);
1307 return proj_res_high;
1311 * Creates an ia32 And.
1313 * @return The created ia32 And node
1315 static ir_node *gen_And(ir_node *node)
1317 ir_node *op1 = get_And_left(node);
1318 ir_node *op2 = get_And_right(node);
1319 assert(! mode_is_float(get_irn_mode(node)));
1321 /* is it a zero extension? */
1322 if (is_Const(op2)) {
1323 tarval *tv = get_Const_tarval(op2);
1324 long v = get_tarval_long(tv);
1326 if (v == 0xFF || v == 0xFFFF) {
1327 dbg_info *dbgi = get_irn_dbg_info(node);
1328 ir_node *block = get_nodes_block(node);
1335 assert(v == 0xFFFF);
1338 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1343 return gen_binop(node, op1, op2, new_bd_ia32_And,
1344 match_commutative | match_mode_neutral | match_am | match_immediate);
1350 * Creates an ia32 Or.
1352 * @return The created ia32 Or node
1354 static ir_node *gen_Or(ir_node *node)
1356 ir_node *op1 = get_Or_left(node);
1357 ir_node *op2 = get_Or_right(node);
1359 assert (! mode_is_float(get_irn_mode(node)));
1360 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1361 | match_mode_neutral | match_am | match_immediate);
1367 * Creates an ia32 Eor.
1369 * @return The created ia32 Eor node
1371 static ir_node *gen_Eor(ir_node *node)
1373 ir_node *op1 = get_Eor_left(node);
1374 ir_node *op2 = get_Eor_right(node);
1376 assert(! mode_is_float(get_irn_mode(node)));
1377 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1378 | match_mode_neutral | match_am | match_immediate);
1383 * Creates an ia32 Sub.
1385 * @return The created ia32 Sub node
1387 static ir_node *gen_Sub(ir_node *node)
1389 ir_node *op1 = get_Sub_left(node);
1390 ir_node *op2 = get_Sub_right(node);
1391 ir_mode *mode = get_irn_mode(node);
1393 if (mode_is_float(mode)) {
1394 if (ia32_cg_config.use_sse2)
1395 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1397 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1400 if (is_Const(op2)) {
1401 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1405 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1406 | match_am | match_immediate);
1409 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1410 ir_node *const src_val,
1411 ir_node *const src_mem,
1412 ir_node *const am_mem)
1414 if (is_NoMem(am_mem)) {
1415 return be_transform_node(src_mem);
1416 } else if (is_Proj(src_val) &&
1418 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1419 /* avoid memory loop */
1421 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1422 ir_node *const ptr_pred = get_Proj_pred(src_val);
1423 int const arity = get_Sync_n_preds(src_mem);
1428 NEW_ARR_A(ir_node*, ins, arity + 1);
1430 /* NOTE: This sometimes produces dead-code because the old sync in
1431 * src_mem might not be used anymore, we should detect this case
1432 * and kill the sync... */
1433 for (i = arity - 1; i >= 0; --i) {
1434 ir_node *const pred = get_Sync_pred(src_mem, i);
1436 /* avoid memory loop */
1437 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1440 ins[n++] = be_transform_node(pred);
1445 return new_r_Sync(irg, block, n, ins);
1449 ins[0] = be_transform_node(src_mem);
1451 return new_r_Sync(irg, block, 2, ins);
1456 * Create a 32bit to 64bit signed extension.
1458 * @param dbgi debug info
1459 * @param block the block where node nodes should be placed
1460 * @param val the value to extend
1461 * @param orig the original node
1463 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1464 ir_node *val, const ir_node *orig)
1469 if (ia32_cg_config.use_short_sex_eax) {
1470 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1471 be_dep_on_frame(pval);
1472 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1474 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1475 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1477 SET_IA32_ORIG_NODE(res, orig);
1482 * Generates an ia32 DivMod with additional infrastructure for the
1483 * register allocator if needed.
1485 static ir_node *create_Div(ir_node *node)
1487 dbg_info *dbgi = get_irn_dbg_info(node);
1488 ir_node *block = get_nodes_block(node);
1489 ir_node *new_block = be_transform_node(block);
1496 ir_node *sign_extension;
1497 ia32_address_mode_t am;
1498 ia32_address_t *addr = &am.addr;
1500 /* the upper bits have random contents for smaller modes */
1501 switch (get_irn_opcode(node)) {
1503 op1 = get_Div_left(node);
1504 op2 = get_Div_right(node);
1505 mem = get_Div_mem(node);
1506 mode = get_Div_resmode(node);
1509 op1 = get_Mod_left(node);
1510 op2 = get_Mod_right(node);
1511 mem = get_Mod_mem(node);
1512 mode = get_Mod_resmode(node);
1515 op1 = get_DivMod_left(node);
1516 op2 = get_DivMod_right(node);
1517 mem = get_DivMod_mem(node);
1518 mode = get_DivMod_resmode(node);
1521 panic("invalid divmod node %+F", node);
1524 match_arguments(&am, block, op1, op2, NULL, match_am);
1526 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1527 is the memory of the consumed address. We can have only the second op as address
1528 in Div nodes, so check only op2. */
1529 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1531 if (mode_is_signed(mode)) {
1532 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1533 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1534 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1536 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1537 be_dep_on_frame(sign_extension);
1539 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1540 addr->index, new_mem, am.new_op2,
1541 am.new_op1, sign_extension);
1544 set_irn_pinned(new_node, get_irn_pinned(node));
1546 set_am_attributes(new_node, &am);
1547 SET_IA32_ORIG_NODE(new_node, node);
1549 new_node = fix_mem_proj(new_node, &am);
1555 * Generates an ia32 Mod.
1557 static ir_node *gen_Mod(ir_node *node)
1559 return create_Div(node);
1563 * Generates an ia32 Div.
1565 static ir_node *gen_Div(ir_node *node)
1567 return create_Div(node);
1571 * Generates an ia32 DivMod.
1573 static ir_node *gen_DivMod(ir_node *node)
1575 return create_Div(node);
1581 * Creates an ia32 floating Div.
1583 * @return The created ia32 xDiv node
1585 static ir_node *gen_Quot(ir_node *node)
1587 ir_node *op1 = get_Quot_left(node);
1588 ir_node *op2 = get_Quot_right(node);
1590 if (ia32_cg_config.use_sse2) {
1591 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1593 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1599 * Creates an ia32 Shl.
1601 * @return The created ia32 Shl node
1603 static ir_node *gen_Shl(ir_node *node)
1605 ir_node *left = get_Shl_left(node);
1606 ir_node *right = get_Shl_right(node);
1608 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1609 match_mode_neutral | match_immediate);
1613 * Creates an ia32 Shr.
1615 * @return The created ia32 Shr node
1617 static ir_node *gen_Shr(ir_node *node)
1619 ir_node *left = get_Shr_left(node);
1620 ir_node *right = get_Shr_right(node);
1622 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1628 * Creates an ia32 Sar.
1630 * @return The created ia32 Shrs node
1632 static ir_node *gen_Shrs(ir_node *node)
1634 ir_node *left = get_Shrs_left(node);
1635 ir_node *right = get_Shrs_right(node);
1637 if (is_Const(right)) {
1638 tarval *tv = get_Const_tarval(right);
1639 long val = get_tarval_long(tv);
1641 /* this is a sign extension */
1642 dbg_info *dbgi = get_irn_dbg_info(node);
1643 ir_node *block = be_transform_node(get_nodes_block(node));
1644 ir_node *new_op = be_transform_node(left);
1646 return create_sex_32_64(dbgi, block, new_op, node);
1650 /* 8 or 16 bit sign extension? */
1651 if (is_Const(right) && is_Shl(left)) {
1652 ir_node *shl_left = get_Shl_left(left);
1653 ir_node *shl_right = get_Shl_right(left);
1654 if (is_Const(shl_right)) {
1655 tarval *tv1 = get_Const_tarval(right);
1656 tarval *tv2 = get_Const_tarval(shl_right);
1657 if (tv1 == tv2 && tarval_is_long(tv1)) {
1658 long val = get_tarval_long(tv1);
1659 if (val == 16 || val == 24) {
1660 dbg_info *dbgi = get_irn_dbg_info(node);
1661 ir_node *block = get_nodes_block(node);
1671 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1680 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1686 * Creates an ia32 Rol.
1688 * @param op1 The first operator
1689 * @param op2 The second operator
1690 * @return The created ia32 RotL node
1692 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1694 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1700 * Creates an ia32 Ror.
1701 * NOTE: There is no RotR with immediate because this would always be a RotL
1702 * "imm-mode_size_bits" which can be pre-calculated.
1704 * @param op1 The first operator
1705 * @param op2 The second operator
1706 * @return The created ia32 RotR node
1708 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1710 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1716 * Creates an ia32 RotR or RotL (depending on the found pattern).
1718 * @return The created ia32 RotL or RotR node
1720 static ir_node *gen_Rotl(ir_node *node)
1722 ir_node *rotate = NULL;
1723 ir_node *op1 = get_Rotl_left(node);
1724 ir_node *op2 = get_Rotl_right(node);
1726 /* Firm has only RotL, so we are looking for a right (op2)
1727 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1728 that means we can create a RotR instead of an Add and a RotL */
1732 ir_node *left = get_Add_left(add);
1733 ir_node *right = get_Add_right(add);
1734 if (is_Const(right)) {
1735 tarval *tv = get_Const_tarval(right);
1736 ir_mode *mode = get_irn_mode(node);
1737 long bits = get_mode_size_bits(mode);
1739 if (is_Minus(left) &&
1740 tarval_is_long(tv) &&
1741 get_tarval_long(tv) == bits &&
1744 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1745 rotate = gen_Ror(node, op1, get_Minus_op(left));
1750 if (rotate == NULL) {
1751 rotate = gen_Rol(node, op1, op2);
1760 * Transforms a Minus node.
1762 * @return The created ia32 Minus node
1764 static ir_node *gen_Minus(ir_node *node)
1766 ir_node *op = get_Minus_op(node);
1767 ir_node *block = be_transform_node(get_nodes_block(node));
1768 dbg_info *dbgi = get_irn_dbg_info(node);
1769 ir_mode *mode = get_irn_mode(node);
1774 if (mode_is_float(mode)) {
1775 ir_node *new_op = be_transform_node(op);
1776 if (ia32_cg_config.use_sse2) {
1777 /* TODO: non-optimal... if we have many xXors, then we should
1778 * rather create a load for the const and use that instead of
1779 * several AM nodes... */
1780 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1782 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1783 nomem, new_op, noreg_xmm);
1785 size = get_mode_size_bits(mode);
1786 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1788 set_ia32_am_sc(new_node, ent);
1789 set_ia32_op_type(new_node, ia32_AddrModeS);
1790 set_ia32_ls_mode(new_node, mode);
1792 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1795 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1798 SET_IA32_ORIG_NODE(new_node, node);
1804 * Transforms a Not node.
1806 * @return The created ia32 Not node
1808 static ir_node *gen_Not(ir_node *node)
1810 ir_node *op = get_Not_op(node);
1812 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1813 assert (! mode_is_float(get_irn_mode(node)));
1815 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1821 * Transforms an Abs node.
1823 * @return The created ia32 Abs node
1825 static ir_node *gen_Abs(ir_node *node)
1827 ir_node *block = get_nodes_block(node);
1828 ir_node *new_block = be_transform_node(block);
1829 ir_node *op = get_Abs_op(node);
1830 dbg_info *dbgi = get_irn_dbg_info(node);
1831 ir_mode *mode = get_irn_mode(node);
1837 if (mode_is_float(mode)) {
1838 new_op = be_transform_node(op);
1840 if (ia32_cg_config.use_sse2) {
1841 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1842 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1843 nomem, new_op, noreg_fp);
1845 size = get_mode_size_bits(mode);
1846 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1848 set_ia32_am_sc(new_node, ent);
1850 SET_IA32_ORIG_NODE(new_node, node);
1852 set_ia32_op_type(new_node, ia32_AddrModeS);
1853 set_ia32_ls_mode(new_node, mode);
1855 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1856 SET_IA32_ORIG_NODE(new_node, node);
1859 ir_node *xor, *sign_extension;
1861 if (get_mode_size_bits(mode) == 32) {
1862 new_op = be_transform_node(op);
1864 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1867 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1869 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1870 nomem, new_op, sign_extension);
1871 SET_IA32_ORIG_NODE(xor, node);
1873 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1874 nomem, xor, sign_extension);
1875 SET_IA32_ORIG_NODE(new_node, node);
1882 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1884 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1886 dbg_info *dbgi = get_irn_dbg_info(cmp);
1887 ir_node *block = get_nodes_block(cmp);
1888 ir_node *new_block = be_transform_node(block);
1889 ir_node *op1 = be_transform_node(x);
1890 ir_node *op2 = be_transform_node(n);
1892 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1896 * Transform a node returning a "flag" result.
1898 * @param node the node to transform
1899 * @param pnc_out the compare mode to use
1901 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1908 /* we have a Cmp as input */
1909 if (is_Proj(node)) {
1910 ir_node *pred = get_Proj_pred(node);
1912 pn_Cmp pnc = get_Proj_proj(node);
1913 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1914 ir_node *l = get_Cmp_left(pred);
1915 ir_node *r = get_Cmp_right(pred);
1917 ir_node *la = get_And_left(l);
1918 ir_node *ra = get_And_right(l);
1920 ir_node *c = get_Shl_left(la);
1921 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1922 /* (1 << n) & ra) */
1923 ir_node *n = get_Shl_right(la);
1924 flags = gen_bt(pred, ra, n);
1925 /* we must generate a Jc/Jnc jump */
1926 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1929 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1934 ir_node *c = get_Shl_left(ra);
1935 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1936 /* la & (1 << n)) */
1937 ir_node *n = get_Shl_right(ra);
1938 flags = gen_bt(pred, la, n);
1939 /* we must generate a Jc/Jnc jump */
1940 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1943 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1949 flags = be_transform_node(pred);
1955 /* a mode_b value, we have to compare it against 0 */
1956 dbgi = get_irn_dbg_info(node);
1957 new_block = be_transform_node(get_nodes_block(node));
1958 new_op = be_transform_node(node);
1959 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1960 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1961 *pnc_out = pn_Cmp_Lg;
1966 * Transforms a Load.
1968 * @return the created ia32 Load node
1970 static ir_node *gen_Load(ir_node *node)
1972 ir_node *old_block = get_nodes_block(node);
1973 ir_node *block = be_transform_node(old_block);
1974 ir_node *ptr = get_Load_ptr(node);
1975 ir_node *mem = get_Load_mem(node);
1976 ir_node *new_mem = be_transform_node(mem);
1979 dbg_info *dbgi = get_irn_dbg_info(node);
1980 ir_mode *mode = get_Load_mode(node);
1983 ia32_address_t addr;
1985 /* construct load address */
1986 memset(&addr, 0, sizeof(addr));
1987 ia32_create_address_mode(&addr, ptr, 0);
1994 base = be_transform_node(base);
1997 if (index == NULL) {
2000 index = be_transform_node(index);
2003 if (mode_is_float(mode)) {
2004 if (ia32_cg_config.use_sse2) {
2005 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2007 res_mode = mode_xmm;
2009 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2011 res_mode = mode_vfp;
2014 assert(mode != mode_b);
2016 /* create a conv node with address mode for smaller modes */
2017 if (get_mode_size_bits(mode) < 32) {
2018 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2019 new_mem, noreg_GP, mode);
2021 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2026 set_irn_pinned(new_node, get_irn_pinned(node));
2027 set_ia32_op_type(new_node, ia32_AddrModeS);
2028 set_ia32_ls_mode(new_node, mode);
2029 set_address(new_node, &addr);
2031 if (get_irn_pinned(node) == op_pin_state_floats) {
2032 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2033 && pn_ia32_vfld_res == pn_ia32_Load_res
2034 && pn_ia32_Load_res == pn_ia32_res);
2035 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2038 SET_IA32_ORIG_NODE(new_node, node);
2040 be_dep_on_frame(new_node);
2044 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2045 ir_node *ptr, ir_node *other)
2052 /* we only use address mode if we're the only user of the load */
2053 if (get_irn_n_edges(node) > 1)
2056 load = get_Proj_pred(node);
2059 if (get_nodes_block(load) != block)
2062 /* store should have the same pointer as the load */
2063 if (get_Load_ptr(load) != ptr)
2066 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2067 if (other != NULL &&
2068 get_nodes_block(other) == block &&
2069 heights_reachable_in_block(heights, other, load)) {
2073 if (prevents_AM(block, load, mem))
2075 /* Store should be attached to the load via mem */
2076 assert(heights_reachable_in_block(heights, mem, load));
2081 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2082 ir_node *mem, ir_node *ptr, ir_mode *mode,
2083 construct_binop_dest_func *func,
2084 construct_binop_dest_func *func8bit,
2085 match_flags_t flags)
2087 ir_node *src_block = get_nodes_block(node);
2095 ia32_address_mode_t am;
2096 ia32_address_t *addr = &am.addr;
2097 memset(&am, 0, sizeof(am));
2099 assert(flags & match_immediate); /* there is no destam node without... */
2100 commutative = (flags & match_commutative) != 0;
2102 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2103 build_address(&am, op1, ia32_create_am_double_use);
2104 new_op = create_immediate_or_transform(op2, 0);
2105 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2106 build_address(&am, op2, ia32_create_am_double_use);
2107 new_op = create_immediate_or_transform(op1, 0);
2112 if (addr->base == NULL)
2113 addr->base = noreg_GP;
2114 if (addr->index == NULL)
2115 addr->index = noreg_GP;
2116 if (addr->mem == NULL)
2119 dbgi = get_irn_dbg_info(node);
2120 block = be_transform_node(src_block);
2121 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2123 if (get_mode_size_bits(mode) == 8) {
2124 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2126 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2128 set_address(new_node, addr);
2129 set_ia32_op_type(new_node, ia32_AddrModeD);
2130 set_ia32_ls_mode(new_node, mode);
2131 SET_IA32_ORIG_NODE(new_node, node);
2133 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2134 mem_proj = be_transform_node(am.mem_proj);
2135 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2140 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2141 ir_node *ptr, ir_mode *mode,
2142 construct_unop_dest_func *func)
2144 ir_node *src_block = get_nodes_block(node);
2150 ia32_address_mode_t am;
2151 ia32_address_t *addr = &am.addr;
2153 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2156 memset(&am, 0, sizeof(am));
2157 build_address(&am, op, ia32_create_am_double_use);
2159 dbgi = get_irn_dbg_info(node);
2160 block = be_transform_node(src_block);
2161 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2162 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2163 set_address(new_node, addr);
2164 set_ia32_op_type(new_node, ia32_AddrModeD);
2165 set_ia32_ls_mode(new_node, mode);
2166 SET_IA32_ORIG_NODE(new_node, node);
2168 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2169 mem_proj = be_transform_node(am.mem_proj);
2170 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2175 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2177 ir_mode *mode = get_irn_mode(node);
2178 ir_node *mux_true = get_Mux_true(node);
2179 ir_node *mux_false = get_Mux_false(node);
2189 ia32_address_t addr;
2191 if (get_mode_size_bits(mode) != 8)
2194 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2196 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2202 build_address_ptr(&addr, ptr, mem);
2204 dbgi = get_irn_dbg_info(node);
2205 block = get_nodes_block(node);
2206 new_block = be_transform_node(block);
2207 cond = get_Mux_sel(node);
2208 flags = get_flags_node(cond, &pnc);
2209 new_mem = be_transform_node(mem);
2210 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2211 addr.index, addr.mem, flags, pnc, negated);
2212 set_address(new_node, &addr);
2213 set_ia32_op_type(new_node, ia32_AddrModeD);
2214 set_ia32_ls_mode(new_node, mode);
2215 SET_IA32_ORIG_NODE(new_node, node);
2220 static ir_node *try_create_dest_am(ir_node *node)
2222 ir_node *val = get_Store_value(node);
2223 ir_node *mem = get_Store_mem(node);
2224 ir_node *ptr = get_Store_ptr(node);
2225 ir_mode *mode = get_irn_mode(val);
2226 unsigned bits = get_mode_size_bits(mode);
2231 /* handle only GP modes for now... */
2232 if (!ia32_mode_needs_gp_reg(mode))
2236 /* store must be the only user of the val node */
2237 if (get_irn_n_edges(val) > 1)
2239 /* skip pointless convs */
2241 ir_node *conv_op = get_Conv_op(val);
2242 ir_mode *pred_mode = get_irn_mode(conv_op);
2243 if (!ia32_mode_needs_gp_reg(pred_mode))
2245 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2253 /* value must be in the same block */
2254 if (get_nodes_block(node) != get_nodes_block(val))
2257 switch (get_irn_opcode(val)) {
2259 op1 = get_Add_left(val);
2260 op2 = get_Add_right(val);
2261 if (ia32_cg_config.use_incdec) {
2262 if (is_Const_1(op2)) {
2263 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2265 } else if (is_Const_Minus_1(op2)) {
2266 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2270 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2271 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2272 match_commutative | match_immediate);
2275 op1 = get_Sub_left(val);
2276 op2 = get_Sub_right(val);
2277 if (is_Const(op2)) {
2278 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2280 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2281 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2285 op1 = get_And_left(val);
2286 op2 = get_And_right(val);
2287 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2288 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2289 match_commutative | match_immediate);
2292 op1 = get_Or_left(val);
2293 op2 = get_Or_right(val);
2294 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2295 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2296 match_commutative | match_immediate);
2299 op1 = get_Eor_left(val);
2300 op2 = get_Eor_right(val);
2301 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2302 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2303 match_commutative | match_immediate);
2306 op1 = get_Shl_left(val);
2307 op2 = get_Shl_right(val);
2308 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2309 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2313 op1 = get_Shr_left(val);
2314 op2 = get_Shr_right(val);
2315 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2316 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2320 op1 = get_Shrs_left(val);
2321 op2 = get_Shrs_right(val);
2322 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2323 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2327 op1 = get_Rotl_left(val);
2328 op2 = get_Rotl_right(val);
2329 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2330 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2333 /* TODO: match ROR patterns... */
2335 new_node = try_create_SetMem(val, ptr, mem);
2338 op1 = get_Minus_op(val);
2339 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2342 /* should be lowered already */
2343 assert(mode != mode_b);
2344 op1 = get_Not_op(val);
2345 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2351 if (new_node != NULL) {
2352 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2353 get_irn_pinned(node) == op_pin_state_pinned) {
2354 set_irn_pinned(new_node, op_pin_state_pinned);
2361 static bool possible_int_mode_for_fp(ir_mode *mode)
2365 if (!mode_is_signed(mode))
2367 size = get_mode_size_bits(mode);
2368 if (size != 16 && size != 32)
2373 static int is_float_to_int_conv(const ir_node *node)
2375 ir_mode *mode = get_irn_mode(node);
2379 if (!possible_int_mode_for_fp(mode))
2384 conv_op = get_Conv_op(node);
2385 conv_mode = get_irn_mode(conv_op);
2387 if (!mode_is_float(conv_mode))
2394 * Transform a Store(floatConst) into a sequence of
2397 * @return the created ia32 Store node
2399 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2401 ir_mode *mode = get_irn_mode(cns);
2402 unsigned size = get_mode_size_bytes(mode);
2403 tarval *tv = get_Const_tarval(cns);
2404 ir_node *block = get_nodes_block(node);
2405 ir_node *new_block = be_transform_node(block);
2406 ir_node *ptr = get_Store_ptr(node);
2407 ir_node *mem = get_Store_mem(node);
2408 dbg_info *dbgi = get_irn_dbg_info(node);
2412 ia32_address_t addr;
2414 assert(size % 4 == 0);
2417 build_address_ptr(&addr, ptr, mem);
2421 get_tarval_sub_bits(tv, ofs) |
2422 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2423 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2424 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2425 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2427 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2428 addr.index, addr.mem, imm);
2430 set_irn_pinned(new_node, get_irn_pinned(node));
2431 set_ia32_op_type(new_node, ia32_AddrModeD);
2432 set_ia32_ls_mode(new_node, mode_Iu);
2433 set_address(new_node, &addr);
2434 SET_IA32_ORIG_NODE(new_node, node);
2437 ins[i++] = new_node;
2442 } while (size != 0);
2445 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2452 * Generate a vfist or vfisttp instruction.
2454 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2455 ir_node *mem, ir_node *val, ir_node **fist)
2459 if (ia32_cg_config.use_fisttp) {
2460 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2461 if other users exists */
2462 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2463 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2464 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2465 be_new_Keep(reg_class, irg, block, 1, &value);
2467 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2470 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2473 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2479 * Transforms a general (no special case) Store.
2481 * @return the created ia32 Store node
2483 static ir_node *gen_general_Store(ir_node *node)
2485 ir_node *val = get_Store_value(node);
2486 ir_mode *mode = get_irn_mode(val);
2487 ir_node *block = get_nodes_block(node);
2488 ir_node *new_block = be_transform_node(block);
2489 ir_node *ptr = get_Store_ptr(node);
2490 ir_node *mem = get_Store_mem(node);
2491 dbg_info *dbgi = get_irn_dbg_info(node);
2492 ir_node *new_val, *new_node, *store;
2493 ia32_address_t addr;
2495 /* check for destination address mode */
2496 new_node = try_create_dest_am(node);
2497 if (new_node != NULL)
2500 /* construct store address */
2501 memset(&addr, 0, sizeof(addr));
2502 ia32_create_address_mode(&addr, ptr, 0);
2504 if (addr.base == NULL) {
2505 addr.base = noreg_GP;
2507 addr.base = be_transform_node(addr.base);
2510 if (addr.index == NULL) {
2511 addr.index = noreg_GP;
2513 addr.index = be_transform_node(addr.index);
2515 addr.mem = be_transform_node(mem);
2517 if (mode_is_float(mode)) {
2518 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2520 while (is_Conv(val) && mode == get_irn_mode(val)) {
2521 ir_node *op = get_Conv_op(val);
2522 if (!mode_is_float(get_irn_mode(op)))
2526 new_val = be_transform_node(val);
2527 if (ia32_cg_config.use_sse2) {
2528 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2529 addr.index, addr.mem, new_val);
2531 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2532 addr.index, addr.mem, new_val, mode);
2535 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2536 val = get_Conv_op(val);
2538 /* TODO: is this optimisation still necessary at all (middleend)? */
2539 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2540 while (is_Conv(val)) {
2541 ir_node *op = get_Conv_op(val);
2542 if (!mode_is_float(get_irn_mode(op)))
2544 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2548 new_val = be_transform_node(val);
2549 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2551 new_val = create_immediate_or_transform(val, 0);
2552 assert(mode != mode_b);
2554 if (get_mode_size_bits(mode) == 8) {
2555 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2556 addr.index, addr.mem, new_val);
2558 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2559 addr.index, addr.mem, new_val);
2564 set_irn_pinned(store, get_irn_pinned(node));
2565 set_ia32_op_type(store, ia32_AddrModeD);
2566 set_ia32_ls_mode(store, mode);
2568 set_address(store, &addr);
2569 SET_IA32_ORIG_NODE(store, node);
2575 * Transforms a Store.
2577 * @return the created ia32 Store node
2579 static ir_node *gen_Store(ir_node *node)
2581 ir_node *val = get_Store_value(node);
2582 ir_mode *mode = get_irn_mode(val);
2584 if (mode_is_float(mode) && is_Const(val)) {
2585 /* We can transform every floating const store
2586 into a sequence of integer stores.
2587 If the constant is already in a register,
2588 it would be better to use it, but we don't
2589 have this information here. */
2590 return gen_float_const_Store(node, val);
2592 return gen_general_Store(node);
2596 * Transforms a Switch.
2598 * @return the created ia32 SwitchJmp node
2600 static ir_node *create_Switch(ir_node *node)
2602 dbg_info *dbgi = get_irn_dbg_info(node);
2603 ir_node *block = be_transform_node(get_nodes_block(node));
2604 ir_node *sel = get_Cond_selector(node);
2605 ir_node *new_sel = be_transform_node(sel);
2606 long switch_min = LONG_MAX;
2607 long switch_max = LONG_MIN;
2608 long default_pn = get_Cond_default_proj(node);
2610 const ir_edge_t *edge;
2612 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2614 /* determine the smallest switch case value */
2615 foreach_out_edge(node, edge) {
2616 ir_node *proj = get_edge_src_irn(edge);
2617 long pn = get_Proj_proj(proj);
2618 if (pn == default_pn)
2621 if (pn < switch_min)
2623 if (pn > switch_max)
2627 if ((unsigned long) (switch_max - switch_min) > 256000) {
2628 panic("Size of switch %+F bigger than 256000", node);
2631 if (switch_min != 0) {
2632 /* if smallest switch case is not 0 we need an additional sub */
2633 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2634 add_ia32_am_offs_int(new_sel, -switch_min);
2635 set_ia32_op_type(new_sel, ia32_AddrModeS);
2637 SET_IA32_ORIG_NODE(new_sel, node);
2640 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2641 SET_IA32_ORIG_NODE(new_node, node);
2647 * Transform a Cond node.
2649 static ir_node *gen_Cond(ir_node *node)
2651 ir_node *block = get_nodes_block(node);
2652 ir_node *new_block = be_transform_node(block);
2653 dbg_info *dbgi = get_irn_dbg_info(node);
2654 ir_node *sel = get_Cond_selector(node);
2655 ir_mode *sel_mode = get_irn_mode(sel);
2656 ir_node *flags = NULL;
2660 if (sel_mode != mode_b) {
2661 return create_Switch(node);
2664 /* we get flags from a Cmp */
2665 flags = get_flags_node(sel, &pnc);
2667 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2668 SET_IA32_ORIG_NODE(new_node, node);
2674 * Transform a be_Copy.
2676 static ir_node *gen_be_Copy(ir_node *node)
2678 ir_node *new_node = be_duplicate_node(node);
2679 ir_mode *mode = get_irn_mode(new_node);
2681 if (ia32_mode_needs_gp_reg(mode)) {
2682 set_irn_mode(new_node, mode_Iu);
2688 static ir_node *create_Fucom(ir_node *node)
2690 dbg_info *dbgi = get_irn_dbg_info(node);
2691 ir_node *block = get_nodes_block(node);
2692 ir_node *new_block = be_transform_node(block);
2693 ir_node *left = get_Cmp_left(node);
2694 ir_node *new_left = be_transform_node(left);
2695 ir_node *right = get_Cmp_right(node);
2699 if (ia32_cg_config.use_fucomi) {
2700 new_right = be_transform_node(right);
2701 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2703 set_ia32_commutative(new_node);
2704 SET_IA32_ORIG_NODE(new_node, node);
2706 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2707 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2709 new_right = be_transform_node(right);
2710 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2713 set_ia32_commutative(new_node);
2715 SET_IA32_ORIG_NODE(new_node, node);
2717 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2718 SET_IA32_ORIG_NODE(new_node, node);
2724 static ir_node *create_Ucomi(ir_node *node)
2726 dbg_info *dbgi = get_irn_dbg_info(node);
2727 ir_node *src_block = get_nodes_block(node);
2728 ir_node *new_block = be_transform_node(src_block);
2729 ir_node *left = get_Cmp_left(node);
2730 ir_node *right = get_Cmp_right(node);
2732 ia32_address_mode_t am;
2733 ia32_address_t *addr = &am.addr;
2735 match_arguments(&am, src_block, left, right, NULL,
2736 match_commutative | match_am);
2738 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2739 addr->mem, am.new_op1, am.new_op2,
2741 set_am_attributes(new_node, &am);
2743 SET_IA32_ORIG_NODE(new_node, node);
2745 new_node = fix_mem_proj(new_node, &am);
2751 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2752 * to fold an and into a test node
2754 static bool can_fold_test_and(ir_node *node)
2756 const ir_edge_t *edge;
2758 /** we can only have eq and lg projs */
2759 foreach_out_edge(node, edge) {
2760 ir_node *proj = get_edge_src_irn(edge);
2761 pn_Cmp pnc = get_Proj_proj(proj);
2762 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2770 * returns true if it is assured, that the upper bits of a node are "clean"
2771 * which means for a 16 or 8 bit value, that the upper bits in the register
2772 * are 0 for unsigned and a copy of the last significant bit for signed
2775 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2777 assert(ia32_mode_needs_gp_reg(mode));
2778 if (get_mode_size_bits(mode) >= 32)
2781 if (is_Proj(transformed_node))
2782 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2784 switch (get_ia32_irn_opcode(transformed_node)) {
2785 case iro_ia32_Conv_I2I:
2786 case iro_ia32_Conv_I2I8Bit: {
2787 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2788 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2790 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2797 if (mode_is_signed(mode)) {
2798 return false; /* TODO handle signed modes */
2800 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2801 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2802 const ia32_immediate_attr_t *attr
2803 = get_ia32_immediate_attr_const(right);
2804 if (attr->symconst == 0 &&
2805 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2809 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2813 /* TODO too conservative if shift amount is constant */
2814 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2817 if (!mode_is_signed(mode)) {
2819 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2820 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2822 /* TODO if one is known to be zero extended, then || is sufficient */
2827 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2828 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2830 case iro_ia32_Const:
2831 case iro_ia32_Immediate: {
2832 const ia32_immediate_attr_t *attr =
2833 get_ia32_immediate_attr_const(transformed_node);
2834 if (mode_is_signed(mode)) {
2835 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2836 return shifted == 0 || shifted == -1;
2838 unsigned long shifted = (unsigned long)attr->offset;
2839 shifted >>= get_mode_size_bits(mode);
2840 return shifted == 0;
2850 * Generate code for a Cmp.
2852 static ir_node *gen_Cmp(ir_node *node)
2854 dbg_info *dbgi = get_irn_dbg_info(node);
2855 ir_node *block = get_nodes_block(node);
2856 ir_node *new_block = be_transform_node(block);
2857 ir_node *left = get_Cmp_left(node);
2858 ir_node *right = get_Cmp_right(node);
2859 ir_mode *cmp_mode = get_irn_mode(left);
2861 ia32_address_mode_t am;
2862 ia32_address_t *addr = &am.addr;
2865 if (mode_is_float(cmp_mode)) {
2866 if (ia32_cg_config.use_sse2) {
2867 return create_Ucomi(node);
2869 return create_Fucom(node);
2873 assert(ia32_mode_needs_gp_reg(cmp_mode));
2875 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2876 cmp_unsigned = !mode_is_signed(cmp_mode);
2877 if (is_Const_0(right) &&
2879 get_irn_n_edges(left) == 1 &&
2880 can_fold_test_and(node)) {
2881 /* Test(and_left, and_right) */
2882 ir_node *and_left = get_And_left(left);
2883 ir_node *and_right = get_And_right(left);
2885 /* matze: code here used mode instead of cmd_mode, I think it is always
2886 * the same as cmp_mode, but I leave this here to see if this is really
2889 assert(get_irn_mode(and_left) == cmp_mode);
2891 match_arguments(&am, block, and_left, and_right, NULL,
2893 match_am | match_8bit_am | match_16bit_am |
2894 match_am_and_immediates | match_immediate);
2896 /* use 32bit compare mode if possible since the opcode is smaller */
2897 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2898 upper_bits_clean(am.new_op2, cmp_mode)) {
2899 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2902 if (get_mode_size_bits(cmp_mode) == 8) {
2903 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2904 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2907 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2908 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2911 /* Cmp(left, right) */
2912 match_arguments(&am, block, left, right, NULL,
2913 match_commutative | match_am | match_8bit_am |
2914 match_16bit_am | match_am_and_immediates |
2916 /* use 32bit compare mode if possible since the opcode is smaller */
2917 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2918 upper_bits_clean(am.new_op2, cmp_mode)) {
2919 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2922 if (get_mode_size_bits(cmp_mode) == 8) {
2923 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2924 addr->index, addr->mem, am.new_op1,
2925 am.new_op2, am.ins_permuted,
2928 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2929 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2932 set_am_attributes(new_node, &am);
2933 set_ia32_ls_mode(new_node, cmp_mode);
2935 SET_IA32_ORIG_NODE(new_node, node);
2937 new_node = fix_mem_proj(new_node, &am);
2942 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2945 dbg_info *dbgi = get_irn_dbg_info(node);
2946 ir_node *block = get_nodes_block(node);
2947 ir_node *new_block = be_transform_node(block);
2948 ir_node *val_true = get_Mux_true(node);
2949 ir_node *val_false = get_Mux_false(node);
2951 ia32_address_mode_t am;
2952 ia32_address_t *addr;
2954 assert(ia32_cg_config.use_cmov);
2955 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2959 match_arguments(&am, block, val_false, val_true, flags,
2960 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2962 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2963 addr->mem, am.new_op1, am.new_op2, new_flags,
2964 am.ins_permuted, pnc);
2965 set_am_attributes(new_node, &am);
2967 SET_IA32_ORIG_NODE(new_node, node);
2969 new_node = fix_mem_proj(new_node, &am);
2975 * Creates a ia32 Setcc instruction.
2977 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2978 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2981 ir_mode *mode = get_irn_mode(orig_node);
2984 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2985 SET_IA32_ORIG_NODE(new_node, orig_node);
2987 /* we might need to conv the result up */
2988 if (get_mode_size_bits(mode) > 8) {
2989 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2990 nomem, new_node, mode_Bu);
2991 SET_IA32_ORIG_NODE(new_node, orig_node);
2998 * Create instruction for an unsigned Difference or Zero.
3000 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3002 ir_graph *irg = current_ir_graph;
3003 ir_mode *mode = get_irn_mode(psi);
3004 ir_node *new_node, *sub, *sbb, *eflags, *block;
3008 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3009 match_mode_neutral | match_am | match_immediate | match_two_users);
3011 block = get_nodes_block(new_node);
3013 if (is_Proj(new_node)) {
3014 sub = get_Proj_pred(new_node);
3015 assert(is_ia32_Sub(sub));
3018 set_irn_mode(sub, mode_T);
3019 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
3021 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3023 dbgi = get_irn_dbg_info(psi);
3024 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3026 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3027 set_ia32_commutative(new_node);
3032 * Create an const array of two float consts.
3034 * @param c0 the first constant
3035 * @param c1 the second constant
3036 * @param new_mode IN/OUT for the mode of the constants, if NULL
3037 * smallest possible mode will be used
3039 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3041 ir_mode *mode = *new_mode;
3043 ir_initializer_t *initializer;
3044 tarval *tv0 = get_Const_tarval(c0);
3045 tarval *tv1 = get_Const_tarval(c1);
3048 /* detect the best mode for the constants */
3049 mode = get_tarval_mode(tv0);
3051 if (mode != mode_F) {
3052 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3053 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3055 tv0 = tarval_convert_to(tv0, mode);
3056 tv1 = tarval_convert_to(tv1, mode);
3057 } else if (mode != mode_D) {
3058 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3059 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3061 tv0 = tarval_convert_to(tv0, mode);
3062 tv1 = tarval_convert_to(tv1, mode);
3069 tp = ia32_create_float_type(mode, 4);
3070 tp = ia32_create_float_array(tp);
3072 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3074 set_entity_ld_ident(ent, get_entity_ident(ent));
3075 set_entity_visibility(ent, visibility_local);
3076 set_entity_variability(ent, variability_constant);
3077 set_entity_allocation(ent, allocation_static);
3079 initializer = create_initializer_compound(2);
3081 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3082 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3084 set_entity_initializer(ent, initializer);
3091 * Transforms a Mux node into some code sequence.
3093 * @return The transformed node.
3095 static ir_node *gen_Mux(ir_node *node)
3097 dbg_info *dbgi = get_irn_dbg_info(node);
3098 ir_node *block = get_nodes_block(node);
3099 ir_node *new_block = be_transform_node(block);
3100 ir_node *mux_true = get_Mux_true(node);
3101 ir_node *mux_false = get_Mux_false(node);
3102 ir_node *cond = get_Mux_sel(node);
3103 ir_mode *mode = get_irn_mode(node);
3108 assert(get_irn_mode(cond) == mode_b);
3110 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3111 if (mode_is_float(mode)) {
3112 ir_node *cmp = get_Proj_pred(cond);
3113 ir_node *cmp_left = get_Cmp_left(cmp);
3114 ir_node *cmp_right = get_Cmp_right(cmp);
3115 pn_Cmp pnc = get_Proj_proj(cond);
3117 if (ia32_cg_config.use_sse2) {
3118 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3119 if (cmp_left == mux_true && cmp_right == mux_false) {
3120 /* Mux(a <= b, a, b) => MIN */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3122 match_commutative | match_am | match_two_users);
3123 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3124 /* Mux(a <= b, b, a) => MAX */
3125 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3126 match_commutative | match_am | match_two_users);
3128 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3129 if (cmp_left == mux_true && cmp_right == mux_false) {
3130 /* Mux(a >= b, a, b) => MAX */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3132 match_commutative | match_am | match_two_users);
3133 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3134 /* Mux(a >= b, b, a) => MIN */
3135 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3136 match_commutative | match_am | match_two_users);
3140 if (is_Const(mux_true) && is_Const(mux_false)) {
3141 ia32_address_mode_t am;
3146 flags = get_flags_node(cond, &pnc);
3147 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3149 if (ia32_cg_config.use_sse2) {
3150 /* cannot load from different mode on SSE */
3153 /* x87 can load any mode */
3157 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3159 switch (get_mode_size_bytes(new_mode)) {
3169 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3170 set_ia32_am_scale(new_node, 2);
3175 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3176 set_ia32_am_scale(new_node, 1);
3179 /* arg, shift 16 NOT supported */
3181 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3184 panic("Unsupported constant size");
3187 am.ls_mode = new_mode;
3188 am.addr.base = noreg_GP;
3189 am.addr.index = new_node;
3190 am.addr.mem = nomem;
3192 am.addr.scale = scale;
3193 am.addr.use_frame = 0;
3194 am.addr.frame_entity = NULL;
3195 am.addr.symconst_sign = 0;
3196 am.mem_proj = am.addr.mem;
3197 am.op_type = ia32_AddrModeS;
3200 am.pinned = op_pin_state_floats;
3202 am.ins_permuted = 0;
3204 if (ia32_cg_config.use_sse2)
3205 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3207 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3208 set_am_attributes(load, &am);
3210 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3212 panic("cannot transform floating point Mux");
3215 assert(ia32_mode_needs_gp_reg(mode));
3217 if (is_Proj(cond)) {
3218 ir_node *cmp = get_Proj_pred(cond);
3220 ir_node *cmp_left = get_Cmp_left(cmp);
3221 ir_node *cmp_right = get_Cmp_right(cmp);
3222 pn_Cmp pnc = get_Proj_proj(cond);
3224 /* check for unsigned Doz first */
3225 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3226 is_Const_0(mux_false) && is_Sub(mux_true) &&
3227 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3228 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3229 return create_Doz(node, cmp_left, cmp_right);
3230 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3231 is_Const_0(mux_true) && is_Sub(mux_false) &&
3232 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3233 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3234 return create_Doz(node, cmp_left, cmp_right);
3239 flags = get_flags_node(cond, &pnc);
3241 if (is_Const(mux_true) && is_Const(mux_false)) {
3242 /* both are const, good */
3243 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3244 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3245 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3246 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3248 /* Not that simple. */
3253 new_node = create_CMov(node, cond, flags, pnc);
3261 * Create a conversion from x87 state register to general purpose.
3263 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3265 ir_node *block = be_transform_node(get_nodes_block(node));
3266 ir_node *op = get_Conv_op(node);
3267 ir_node *new_op = be_transform_node(op);
3268 ir_graph *irg = current_ir_graph;
3269 dbg_info *dbgi = get_irn_dbg_info(node);
3270 ir_mode *mode = get_irn_mode(node);
3271 ir_node *fist, *load, *mem;
3273 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3274 set_irn_pinned(fist, op_pin_state_floats);
3275 set_ia32_use_frame(fist);
3276 set_ia32_op_type(fist, ia32_AddrModeD);
3278 assert(get_mode_size_bits(mode) <= 32);
3279 /* exception we can only store signed 32 bit integers, so for unsigned
3280 we store a 64bit (signed) integer and load the lower bits */
3281 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3282 set_ia32_ls_mode(fist, mode_Ls);
3284 set_ia32_ls_mode(fist, mode_Is);
3286 SET_IA32_ORIG_NODE(fist, node);
3289 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3291 set_irn_pinned(load, op_pin_state_floats);
3292 set_ia32_use_frame(load);
3293 set_ia32_op_type(load, ia32_AddrModeS);
3294 set_ia32_ls_mode(load, mode_Is);
3295 if (get_ia32_ls_mode(fist) == mode_Ls) {
3296 ia32_attr_t *attr = get_ia32_attr(load);
3297 attr->data.need_64bit_stackent = 1;
3299 ia32_attr_t *attr = get_ia32_attr(load);
3300 attr->data.need_32bit_stackent = 1;
3302 SET_IA32_ORIG_NODE(load, node);
3304 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3308 * Creates a x87 strict Conv by placing a Store and a Load
3310 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3312 ir_node *block = get_nodes_block(node);
3313 ir_graph *irg = current_ir_graph;
3314 dbg_info *dbgi = get_irn_dbg_info(node);
3315 ir_node *frame = get_irg_frame(irg);
3316 ir_node *store, *load;
3319 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3320 set_ia32_use_frame(store);
3321 set_ia32_op_type(store, ia32_AddrModeD);
3322 SET_IA32_ORIG_NODE(store, node);
3324 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3325 set_ia32_use_frame(load);
3326 set_ia32_op_type(load, ia32_AddrModeS);
3327 SET_IA32_ORIG_NODE(load, node);
3329 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3333 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3334 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3336 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3338 func = get_mode_size_bits(mode) == 8 ?
3339 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3340 return func(dbgi, block, base, index, mem, val, mode);
3344 * Create a conversion from general purpose to x87 register
3346 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3348 ir_node *src_block = get_nodes_block(node);
3349 ir_node *block = be_transform_node(src_block);
3350 ir_graph *irg = current_ir_graph;
3351 dbg_info *dbgi = get_irn_dbg_info(node);
3352 ir_node *op = get_Conv_op(node);
3353 ir_node *new_op = NULL;
3355 ir_mode *store_mode;
3360 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3361 if (possible_int_mode_for_fp(src_mode)) {
3362 ia32_address_mode_t am;
3364 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3365 if (am.op_type == ia32_AddrModeS) {
3366 ia32_address_t *addr = &am.addr;
3368 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3369 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3371 set_am_attributes(fild, &am);
3372 SET_IA32_ORIG_NODE(fild, node);
3374 fix_mem_proj(fild, &am);
3379 if (new_op == NULL) {
3380 new_op = be_transform_node(op);
3383 mode = get_irn_mode(op);
3385 /* first convert to 32 bit signed if necessary */
3386 if (get_mode_size_bits(src_mode) < 32) {
3387 if (!upper_bits_clean(new_op, src_mode)) {
3388 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3389 SET_IA32_ORIG_NODE(new_op, node);
3394 assert(get_mode_size_bits(mode) == 32);
3397 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3399 set_ia32_use_frame(store);
3400 set_ia32_op_type(store, ia32_AddrModeD);
3401 set_ia32_ls_mode(store, mode_Iu);
3403 /* exception for 32bit unsigned, do a 64bit spill+load */
3404 if (!mode_is_signed(mode)) {
3407 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3409 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3410 noreg_GP, nomem, zero_const);
3412 set_ia32_use_frame(zero_store);
3413 set_ia32_op_type(zero_store, ia32_AddrModeD);
3414 add_ia32_am_offs_int(zero_store, 4);
3415 set_ia32_ls_mode(zero_store, mode_Iu);
3420 store = new_rd_Sync(dbgi, irg, block, 2, in);
3421 store_mode = mode_Ls;
3423 store_mode = mode_Is;
3427 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3429 set_ia32_use_frame(fild);
3430 set_ia32_op_type(fild, ia32_AddrModeS);
3431 set_ia32_ls_mode(fild, store_mode);
3433 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3439 * Create a conversion from one integer mode into another one
3441 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3442 dbg_info *dbgi, ir_node *block, ir_node *op,
3445 ir_node *new_block = be_transform_node(block);
3447 ir_mode *smaller_mode;
3448 ia32_address_mode_t am;
3449 ia32_address_t *addr = &am.addr;
3452 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3453 smaller_mode = src_mode;
3455 smaller_mode = tgt_mode;
3458 #ifdef DEBUG_libfirm
3460 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3465 match_arguments(&am, block, NULL, op, NULL,
3466 match_am | match_8bit_am | match_16bit_am);
3468 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3469 /* unnecessary conv. in theory it shouldn't have been AM */
3470 assert(is_ia32_NoReg_GP(addr->base));
3471 assert(is_ia32_NoReg_GP(addr->index));
3472 assert(is_NoMem(addr->mem));
3473 assert(am.addr.offset == 0);
3474 assert(am.addr.symconst_ent == NULL);
3478 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3479 addr->mem, am.new_op2, smaller_mode);
3480 set_am_attributes(new_node, &am);
3481 /* match_arguments assume that out-mode = in-mode, this isn't true here
3483 set_ia32_ls_mode(new_node, smaller_mode);
3484 SET_IA32_ORIG_NODE(new_node, node);
3485 new_node = fix_mem_proj(new_node, &am);
3490 * Transforms a Conv node.
3492 * @return The created ia32 Conv node
3494 static ir_node *gen_Conv(ir_node *node)
3496 ir_node *block = get_nodes_block(node);
3497 ir_node *new_block = be_transform_node(block);
3498 ir_node *op = get_Conv_op(node);
3499 ir_node *new_op = NULL;
3500 dbg_info *dbgi = get_irn_dbg_info(node);
3501 ir_mode *src_mode = get_irn_mode(op);
3502 ir_mode *tgt_mode = get_irn_mode(node);
3503 int src_bits = get_mode_size_bits(src_mode);
3504 int tgt_bits = get_mode_size_bits(tgt_mode);
3505 ir_node *res = NULL;
3507 assert(!mode_is_int(src_mode) || src_bits <= 32);
3508 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3510 if (src_mode == mode_b) {
3511 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3512 /* nothing to do, we already model bools as 0/1 ints */
3513 return be_transform_node(op);
3516 if (src_mode == tgt_mode) {
3517 if (get_Conv_strict(node)) {
3518 if (ia32_cg_config.use_sse2) {
3519 /* when we are in SSE mode, we can kill all strict no-op conversion */
3520 return be_transform_node(op);
3523 /* this should be optimized already, but who knows... */
3524 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3525 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3526 return be_transform_node(op);
3530 if (mode_is_float(src_mode)) {
3531 new_op = be_transform_node(op);
3532 /* we convert from float ... */
3533 if (mode_is_float(tgt_mode)) {
3535 /* Matze: I'm a bit unsure what the following is for? seems wrong
3537 if (src_mode == mode_E && tgt_mode == mode_D
3538 && !get_Conv_strict(node)) {
3539 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3545 if (ia32_cg_config.use_sse2) {
3546 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3547 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3549 set_ia32_ls_mode(res, tgt_mode);
3551 if (get_Conv_strict(node)) {
3552 /* if fp_no_float_fold is not set then we assume that we
3553 * don't have any float operations in a non
3554 * mode_float_arithmetic mode and can skip strict upconvs */
3555 if (src_bits < tgt_bits
3556 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3557 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3560 res = gen_x87_strict_conv(tgt_mode, new_op);
3561 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3565 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3570 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3571 if (ia32_cg_config.use_sse2) {
3572 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3574 set_ia32_ls_mode(res, src_mode);
3576 return gen_x87_fp_to_gp(node);
3580 /* we convert from int ... */
3581 if (mode_is_float(tgt_mode)) {
3583 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3584 if (ia32_cg_config.use_sse2) {
3585 new_op = be_transform_node(op);
3586 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3588 set_ia32_ls_mode(res, tgt_mode);
3590 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3591 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3592 res = gen_x87_gp_to_fp(node, src_mode);
3594 /* we need a strict-Conv, if the int mode has more bits than the
3596 if (float_mantissa < int_mantissa) {
3597 res = gen_x87_strict_conv(tgt_mode, res);
3598 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3602 } else if (tgt_mode == mode_b) {
3603 /* mode_b lowering already took care that we only have 0/1 values */
3604 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3605 src_mode, tgt_mode));
3606 return be_transform_node(op);
3609 if (src_bits == tgt_bits) {
3610 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3611 src_mode, tgt_mode));
3612 return be_transform_node(op);
3615 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3623 static ir_node *create_immediate_or_transform(ir_node *node,
3624 char immediate_constraint_type)
3626 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3627 if (new_node == NULL) {
3628 new_node = be_transform_node(node);
3634 * Transforms a FrameAddr into an ia32 Add.
3636 static ir_node *gen_be_FrameAddr(ir_node *node)
3638 ir_node *block = be_transform_node(get_nodes_block(node));
3639 ir_node *op = be_get_FrameAddr_frame(node);
3640 ir_node *new_op = be_transform_node(op);
3641 dbg_info *dbgi = get_irn_dbg_info(node);
3644 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3645 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3646 set_ia32_use_frame(new_node);
3648 SET_IA32_ORIG_NODE(new_node, node);
3654 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3656 static ir_node *gen_be_Return(ir_node *node)
3658 ir_graph *irg = current_ir_graph;
3659 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3660 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3661 ir_entity *ent = get_irg_entity(irg);
3662 ir_type *tp = get_entity_type(ent);
3667 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3668 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3670 int pn_ret_val, pn_ret_mem, arity, i;
3672 assert(ret_val != NULL);
3673 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3674 return be_duplicate_node(node);
3677 res_type = get_method_res_type(tp, 0);
3679 if (! is_Primitive_type(res_type)) {
3680 return be_duplicate_node(node);
3683 mode = get_type_mode(res_type);
3684 if (! mode_is_float(mode)) {
3685 return be_duplicate_node(node);
3688 assert(get_method_n_ress(tp) == 1);
3690 pn_ret_val = get_Proj_proj(ret_val);
3691 pn_ret_mem = get_Proj_proj(ret_mem);
3693 /* get the Barrier */
3694 barrier = get_Proj_pred(ret_val);
3696 /* get result input of the Barrier */
3697 ret_val = get_irn_n(barrier, pn_ret_val);
3698 new_ret_val = be_transform_node(ret_val);
3700 /* get memory input of the Barrier */
3701 ret_mem = get_irn_n(barrier, pn_ret_mem);
3702 new_ret_mem = be_transform_node(ret_mem);
3704 frame = get_irg_frame(irg);
3706 dbgi = get_irn_dbg_info(barrier);
3707 block = be_transform_node(get_nodes_block(barrier));
3709 /* store xmm0 onto stack */
3710 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3711 new_ret_mem, new_ret_val);
3712 set_ia32_ls_mode(sse_store, mode);
3713 set_ia32_op_type(sse_store, ia32_AddrModeD);
3714 set_ia32_use_frame(sse_store);
3716 /* load into x87 register */
3717 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3718 set_ia32_op_type(fld, ia32_AddrModeS);
3719 set_ia32_use_frame(fld);
3721 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3722 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3724 /* create a new barrier */
3725 arity = get_irn_arity(barrier);
3726 in = ALLOCAN(ir_node*, arity);
3727 for (i = 0; i < arity; ++i) {
3730 if (i == pn_ret_val) {
3732 } else if (i == pn_ret_mem) {
3735 ir_node *in = get_irn_n(barrier, i);
3736 new_in = be_transform_node(in);
3741 new_barrier = new_ir_node(dbgi, irg, block,
3742 get_irn_op(barrier), get_irn_mode(barrier),
3744 copy_node_attr(barrier, new_barrier);
3745 be_duplicate_deps(barrier, new_barrier);
3746 be_set_transformed_node(barrier, new_barrier);
3748 /* transform normally */
3749 return be_duplicate_node(node);
3753 * Transform a be_AddSP into an ia32_SubSP.
3755 static ir_node *gen_be_AddSP(ir_node *node)
3757 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3758 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3760 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3761 match_am | match_immediate);
3765 * Transform a be_SubSP into an ia32_AddSP
3767 static ir_node *gen_be_SubSP(ir_node *node)
3769 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3770 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3772 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3773 match_am | match_immediate);
3777 * Change some phi modes
3779 static ir_node *gen_Phi(ir_node *node)
3781 ir_node *block = be_transform_node(get_nodes_block(node));
3782 ir_graph *irg = current_ir_graph;
3783 dbg_info *dbgi = get_irn_dbg_info(node);
3784 ir_mode *mode = get_irn_mode(node);
3787 if (ia32_mode_needs_gp_reg(mode)) {
3788 /* we shouldn't have any 64bit stuff around anymore */
3789 assert(get_mode_size_bits(mode) <= 32);
3790 /* all integer operations are on 32bit registers now */
3792 } else if (mode_is_float(mode)) {
3793 if (ia32_cg_config.use_sse2) {
3800 /* phi nodes allow loops, so we use the old arguments for now
3801 * and fix this later */
3802 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3803 get_irn_in(node) + 1);
3804 copy_node_attr(node, phi);
3805 be_duplicate_deps(node, phi);
3807 be_enqueue_preds(node);
3815 static ir_node *gen_IJmp(ir_node *node)
3817 ir_node *block = get_nodes_block(node);
3818 ir_node *new_block = be_transform_node(block);
3819 dbg_info *dbgi = get_irn_dbg_info(node);
3820 ir_node *op = get_IJmp_target(node);
3822 ia32_address_mode_t am;
3823 ia32_address_t *addr = &am.addr;
3825 assert(get_irn_mode(op) == mode_P);
3827 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3829 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3830 addr->mem, am.new_op2);
3831 set_am_attributes(new_node, &am);
3832 SET_IA32_ORIG_NODE(new_node, node);
3834 new_node = fix_mem_proj(new_node, &am);
3840 * Transform a Bound node.
3842 static ir_node *gen_Bound(ir_node *node)
3845 ir_node *lower = get_Bound_lower(node);
3846 dbg_info *dbgi = get_irn_dbg_info(node);
3848 if (is_Const_0(lower)) {
3849 /* typical case for Java */
3850 ir_node *sub, *res, *flags, *block;
3851 ir_graph *irg = current_ir_graph;
3853 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3854 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3856 block = get_nodes_block(res);
3857 if (! is_Proj(res)) {
3859 set_irn_mode(sub, mode_T);
3860 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3862 sub = get_Proj_pred(res);
3864 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3865 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3866 SET_IA32_ORIG_NODE(new_node, node);
3868 panic("generic Bound not supported in ia32 Backend");
3874 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3876 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3877 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3879 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3880 match_immediate | match_mode_neutral);
3883 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3885 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3886 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3887 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3891 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3893 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3894 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3895 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3899 static ir_node *gen_ia32_l_Add(ir_node *node)
3901 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3902 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3903 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3904 match_commutative | match_am | match_immediate |
3905 match_mode_neutral);
3907 if (is_Proj(lowered)) {
3908 lowered = get_Proj_pred(lowered);
3910 assert(is_ia32_Add(lowered));
3911 set_irn_mode(lowered, mode_T);
3917 static ir_node *gen_ia32_l_Adc(ir_node *node)
3919 return gen_binop_flags(node, new_bd_ia32_Adc,
3920 match_commutative | match_am | match_immediate |
3921 match_mode_neutral);
3925 * Transforms a l_MulS into a "real" MulS node.
3927 * @return the created ia32 Mul node
3929 static ir_node *gen_ia32_l_Mul(ir_node *node)
3931 ir_node *left = get_binop_left(node);
3932 ir_node *right = get_binop_right(node);
3934 return gen_binop(node, left, right, new_bd_ia32_Mul,
3935 match_commutative | match_am | match_mode_neutral);
3939 * Transforms a l_IMulS into a "real" IMul1OPS node.
3941 * @return the created ia32 IMul1OP node
3943 static ir_node *gen_ia32_l_IMul(ir_node *node)
3945 ir_node *left = get_binop_left(node);
3946 ir_node *right = get_binop_right(node);
3948 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3949 match_commutative | match_am | match_mode_neutral);
3952 static ir_node *gen_ia32_l_Sub(ir_node *node)
3954 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3955 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3956 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3957 match_am | match_immediate | match_mode_neutral);
3959 if (is_Proj(lowered)) {
3960 lowered = get_Proj_pred(lowered);
3962 assert(is_ia32_Sub(lowered));
3963 set_irn_mode(lowered, mode_T);
3969 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3971 return gen_binop_flags(node, new_bd_ia32_Sbb,
3972 match_am | match_immediate | match_mode_neutral);
3976 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3977 * op1 - target to be shifted
3978 * op2 - contains bits to be shifted into target
3980 * Only op3 can be an immediate.
3982 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3983 ir_node *low, ir_node *count)
3985 ir_node *block = get_nodes_block(node);
3986 ir_node *new_block = be_transform_node(block);
3987 dbg_info *dbgi = get_irn_dbg_info(node);
3988 ir_node *new_high = be_transform_node(high);
3989 ir_node *new_low = be_transform_node(low);
3993 /* the shift amount can be any mode that is bigger than 5 bits, since all
3994 * other bits are ignored anyway */
3995 while (is_Conv(count) &&
3996 get_irn_n_edges(count) == 1 &&
3997 mode_is_int(get_irn_mode(count))) {
3998 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3999 count = get_Conv_op(count);
4001 new_count = create_immediate_or_transform(count, 0);
4003 if (is_ia32_l_ShlD(node)) {
4004 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4007 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4010 SET_IA32_ORIG_NODE(new_node, node);
4015 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4017 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4018 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4019 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4020 return gen_lowered_64bit_shifts(node, high, low, count);
4023 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4025 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4026 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4027 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4028 return gen_lowered_64bit_shifts(node, high, low, count);
4031 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4033 ir_node *src_block = get_nodes_block(node);
4034 ir_node *block = be_transform_node(src_block);
4035 ir_graph *irg = current_ir_graph;
4036 dbg_info *dbgi = get_irn_dbg_info(node);
4037 ir_node *frame = get_irg_frame(irg);
4038 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4039 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4040 ir_node *new_val_low = be_transform_node(val_low);
4041 ir_node *new_val_high = be_transform_node(val_high);
4043 ir_node *sync, *fild, *res;
4044 ir_node *store_low, *store_high;
4046 if (ia32_cg_config.use_sse2) {
4047 panic("ia32_l_LLtoFloat not implemented for SSE2");
4051 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4053 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4055 SET_IA32_ORIG_NODE(store_low, node);
4056 SET_IA32_ORIG_NODE(store_high, node);
4058 set_ia32_use_frame(store_low);
4059 set_ia32_use_frame(store_high);
4060 set_ia32_op_type(store_low, ia32_AddrModeD);
4061 set_ia32_op_type(store_high, ia32_AddrModeD);
4062 set_ia32_ls_mode(store_low, mode_Iu);
4063 set_ia32_ls_mode(store_high, mode_Is);
4064 add_ia32_am_offs_int(store_high, 4);
4068 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4071 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4073 set_ia32_use_frame(fild);
4074 set_ia32_op_type(fild, ia32_AddrModeS);
4075 set_ia32_ls_mode(fild, mode_Ls);
4077 SET_IA32_ORIG_NODE(fild, node);
4079 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4081 if (! mode_is_signed(get_irn_mode(val_high))) {
4082 ia32_address_mode_t am;
4084 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4087 am.addr.base = noreg_GP;
4088 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4089 am.addr.mem = nomem;
4092 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4093 am.addr.use_frame = 0;
4094 am.addr.frame_entity = NULL;
4095 am.addr.symconst_sign = 0;
4096 am.ls_mode = mode_F;
4097 am.mem_proj = nomem;
4098 am.op_type = ia32_AddrModeS;
4100 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4101 am.pinned = op_pin_state_floats;
4103 am.ins_permuted = 0;
4105 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4106 am.new_op1, am.new_op2, get_fpcw());
4107 set_am_attributes(fadd, &am);
4109 set_irn_mode(fadd, mode_T);
4110 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4115 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4117 ir_node *src_block = get_nodes_block(node);
4118 ir_node *block = be_transform_node(src_block);
4119 ir_graph *irg = current_ir_graph;
4120 dbg_info *dbgi = get_irn_dbg_info(node);
4121 ir_node *frame = get_irg_frame(irg);
4122 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4123 ir_node *new_val = be_transform_node(val);
4124 ir_node *fist, *mem;
4126 mem = gen_vfist(dbgi, irg, block, frame, noreg_GP, nomem, new_val, &fist);
4127 SET_IA32_ORIG_NODE(fist, node);
4128 set_ia32_use_frame(fist);
4129 set_ia32_op_type(fist, ia32_AddrModeD);
4130 set_ia32_ls_mode(fist, mode_Ls);
4136 * the BAD transformer.
4138 static ir_node *bad_transform(ir_node *node)
4140 panic("No transform function for %+F available.", node);
4144 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4146 ir_graph *irg = current_ir_graph;
4147 ir_node *block = be_transform_node(get_nodes_block(node));
4148 ir_node *pred = get_Proj_pred(node);
4149 ir_node *new_pred = be_transform_node(pred);
4150 ir_node *frame = get_irg_frame(irg);
4151 dbg_info *dbgi = get_irn_dbg_info(node);
4152 long pn = get_Proj_proj(node);
4157 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4158 SET_IA32_ORIG_NODE(load, node);
4159 set_ia32_use_frame(load);
4160 set_ia32_op_type(load, ia32_AddrModeS);
4161 set_ia32_ls_mode(load, mode_Iu);
4162 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4163 * 32 bit from it with this particular load */
4164 attr = get_ia32_attr(load);
4165 attr->data.need_64bit_stackent = 1;
4167 if (pn == pn_ia32_l_FloattoLL_res_high) {
4168 add_ia32_am_offs_int(load, 4);
4170 assert(pn == pn_ia32_l_FloattoLL_res_low);
4173 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4179 * Transform the Projs of an AddSP.
4181 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4183 ir_node *block = be_transform_node(get_nodes_block(node));
4184 ir_node *pred = get_Proj_pred(node);
4185 ir_node *new_pred = be_transform_node(pred);
4186 ir_graph *irg = current_ir_graph;
4187 dbg_info *dbgi = get_irn_dbg_info(node);
4188 long proj = get_Proj_proj(node);
4190 if (proj == pn_be_AddSP_sp) {
4191 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4192 pn_ia32_SubSP_stack);
4193 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4195 } else if (proj == pn_be_AddSP_res) {
4196 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4197 pn_ia32_SubSP_addr);
4198 } else if (proj == pn_be_AddSP_M) {
4199 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4202 panic("No idea how to transform proj->AddSP");
4206 * Transform the Projs of a SubSP.
4208 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4210 ir_node *block = be_transform_node(get_nodes_block(node));
4211 ir_node *pred = get_Proj_pred(node);
4212 ir_node *new_pred = be_transform_node(pred);
4213 ir_graph *irg = current_ir_graph;
4214 dbg_info *dbgi = get_irn_dbg_info(node);
4215 long proj = get_Proj_proj(node);
4217 if (proj == pn_be_SubSP_sp) {
4218 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4219 pn_ia32_AddSP_stack);
4220 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4222 } else if (proj == pn_be_SubSP_M) {
4223 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4226 panic("No idea how to transform proj->SubSP");
4230 * Transform and renumber the Projs from a Load.
4232 static ir_node *gen_Proj_Load(ir_node *node)
4235 ir_node *block = be_transform_node(get_nodes_block(node));
4236 ir_node *pred = get_Proj_pred(node);
4237 ir_graph *irg = current_ir_graph;
4238 dbg_info *dbgi = get_irn_dbg_info(node);
4239 long proj = get_Proj_proj(node);
4241 /* loads might be part of source address mode matches, so we don't
4242 * transform the ProjMs yet (with the exception of loads whose result is
4245 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4247 ir_node *old_block = get_nodes_block(node);
4249 /* this is needed, because sometimes we have loops that are only
4250 reachable through the ProjM */
4251 be_enqueue_preds(node);
4252 /* do it in 2 steps, to silence firm verifier */
4253 res = new_rd_Proj(dbgi, irg, old_block, pred, mode_M, pn_Load_M);
4254 set_Proj_proj(res, pn_ia32_mem);
4258 /* renumber the proj */
4259 new_pred = be_transform_node(pred);
4260 if (is_ia32_Load(new_pred)) {
4263 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4265 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4266 case pn_Load_X_regular:
4267 return new_rd_Jmp(dbgi, irg, block);
4268 case pn_Load_X_except:
4269 /* This Load might raise an exception. Mark it. */
4270 set_ia32_exc_label(new_pred, 1);
4271 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4275 } else if (is_ia32_Conv_I2I(new_pred) ||
4276 is_ia32_Conv_I2I8Bit(new_pred)) {
4277 set_irn_mode(new_pred, mode_T);
4278 if (proj == pn_Load_res) {
4279 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4280 } else if (proj == pn_Load_M) {
4281 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4283 } else if (is_ia32_xLoad(new_pred)) {
4286 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4288 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4289 case pn_Load_X_regular:
4290 return new_rd_Jmp(dbgi, irg, block);
4291 case pn_Load_X_except:
4292 /* This Load might raise an exception. Mark it. */
4293 set_ia32_exc_label(new_pred, 1);
4294 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4298 } else if (is_ia32_vfld(new_pred)) {
4301 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4303 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4304 case pn_Load_X_regular:
4305 return new_rd_Jmp(dbgi, irg, block);
4306 case pn_Load_X_except:
4307 /* This Load might raise an exception. Mark it. */
4308 set_ia32_exc_label(new_pred, 1);
4309 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4314 /* can happen for ProJMs when source address mode happened for the
4317 /* however it should not be the result proj, as that would mean the
4318 load had multiple users and should not have been used for
4320 if (proj != pn_Load_M) {
4321 panic("internal error: transformed node not a Load");
4323 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4326 panic("No idea how to transform proj");
4330 * Transform and renumber the Projs from a DivMod like instruction.
4332 static ir_node *gen_Proj_DivMod(ir_node *node)
4334 ir_node *block = be_transform_node(get_nodes_block(node));
4335 ir_node *pred = get_Proj_pred(node);
4336 ir_node *new_pred = be_transform_node(pred);
4337 ir_graph *irg = current_ir_graph;
4338 dbg_info *dbgi = get_irn_dbg_info(node);
4339 long proj = get_Proj_proj(node);
4341 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4343 switch (get_irn_opcode(pred)) {
4347 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4349 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4350 case pn_Div_X_regular:
4351 return new_rd_Jmp(dbgi, irg, block);
4352 case pn_Div_X_except:
4353 set_ia32_exc_label(new_pred, 1);
4354 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4362 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4364 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4365 case pn_Mod_X_except:
4366 set_ia32_exc_label(new_pred, 1);
4367 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4375 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4376 case pn_DivMod_res_div:
4377 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4378 case pn_DivMod_res_mod:
4379 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4380 case pn_DivMod_X_regular:
4381 return new_rd_Jmp(dbgi, irg, block);
4382 case pn_DivMod_X_except:
4383 set_ia32_exc_label(new_pred, 1);
4384 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4393 panic("No idea how to transform proj->DivMod");
4397 * Transform and renumber the Projs from a CopyB.
4399 static ir_node *gen_Proj_CopyB(ir_node *node)
4401 ir_node *block = be_transform_node(get_nodes_block(node));
4402 ir_node *pred = get_Proj_pred(node);
4403 ir_node *new_pred = be_transform_node(pred);
4404 ir_graph *irg = current_ir_graph;
4405 dbg_info *dbgi = get_irn_dbg_info(node);
4406 long proj = get_Proj_proj(node);
4409 case pn_CopyB_M_regular:
4410 if (is_ia32_CopyB_i(new_pred)) {
4411 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4412 } else if (is_ia32_CopyB(new_pred)) {
4413 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4420 panic("No idea how to transform proj->CopyB");
4424 * Transform and renumber the Projs from a Quot.
4426 static ir_node *gen_Proj_Quot(ir_node *node)
4428 ir_node *block = be_transform_node(get_nodes_block(node));
4429 ir_node *pred = get_Proj_pred(node);
4430 ir_node *new_pred = be_transform_node(pred);
4431 ir_graph *irg = current_ir_graph;
4432 dbg_info *dbgi = get_irn_dbg_info(node);
4433 long proj = get_Proj_proj(node);
4437 if (is_ia32_xDiv(new_pred)) {
4438 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4439 } else if (is_ia32_vfdiv(new_pred)) {
4440 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4444 if (is_ia32_xDiv(new_pred)) {
4445 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4446 } else if (is_ia32_vfdiv(new_pred)) {
4447 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4450 case pn_Quot_X_regular:
4451 case pn_Quot_X_except:
4456 panic("No idea how to transform proj->Quot");
4459 static ir_node *gen_be_Call(ir_node *node)
4461 dbg_info *const dbgi = get_irn_dbg_info(node);
4462 ir_graph *const irg = current_ir_graph;
4463 ir_node *const src_block = get_nodes_block(node);
4464 ir_node *const block = be_transform_node(src_block);
4465 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4466 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4467 ir_node *const sp = be_transform_node(src_sp);
4468 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4469 ia32_address_mode_t am;
4470 ia32_address_t *const addr = &am.addr;
4475 ir_node * eax = noreg_GP;
4476 ir_node * ecx = noreg_GP;
4477 ir_node * edx = noreg_GP;
4478 unsigned const pop = be_Call_get_pop(node);
4479 ir_type *const call_tp = be_Call_get_type(node);
4480 int old_no_pic_adjust;
4482 /* Run the x87 simulator if the call returns a float value */
4483 if (get_method_n_ress(call_tp) > 0) {
4484 ir_type *const res_type = get_method_res_type(call_tp, 0);
4485 ir_mode *const res_mode = get_type_mode(res_type);
4487 if (res_mode != NULL && mode_is_float(res_mode)) {
4488 env_cg->do_x87_sim = 1;
4492 /* We do not want be_Call direct calls */
4493 assert(be_Call_get_entity(node) == NULL);
4495 /* special case for PIC trampoline calls */
4496 old_no_pic_adjust = no_pic_adjust;
4497 no_pic_adjust = env_cg->birg->main_env->options->pic;
4499 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4500 match_am | match_immediate);
4502 no_pic_adjust = old_no_pic_adjust;
4504 i = get_irn_arity(node) - 1;
4505 fpcw = be_transform_node(get_irn_n(node, i--));
4506 for (; i >= be_pos_Call_first_arg; --i) {
4507 arch_register_req_t const *const req = arch_get_register_req(node, i);
4508 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4510 assert(req->type == arch_register_req_type_limited);
4511 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4513 switch (*req->limited) {
4514 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4515 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4516 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4517 default: panic("Invalid GP register for register parameter");
4521 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4522 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4523 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4524 set_am_attributes(call, &am);
4525 call = fix_mem_proj(call, &am);
4527 if (get_irn_pinned(node) == op_pin_state_pinned)
4528 set_irn_pinned(call, op_pin_state_pinned);
4530 SET_IA32_ORIG_NODE(call, node);
4532 if (ia32_cg_config.use_sse2) {
4533 /* remember this call for post-processing */
4534 ARR_APP1(ir_node *, call_list, call);
4535 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4542 * Transform Builtin trap
4544 static ir_node *gen_trap(ir_node *node) {
4545 dbg_info *dbgi = get_irn_dbg_info(node);
4546 ir_node *block = be_transform_node(get_nodes_block(node));
4547 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4549 return new_bd_ia32_UD2(dbgi, block, mem);
4553 * Transform Builtin debugbreak
4555 static ir_node *gen_debugbreak(ir_node *node) {
4556 dbg_info *dbgi = get_irn_dbg_info(node);
4557 ir_node *block = be_transform_node(get_nodes_block(node));
4558 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4560 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4564 * Transform Builtin return_address
4566 static ir_node *gen_return_address(ir_node *node) {
4567 ir_node *param = get_Builtin_param(node, 0);
4568 ir_node *frame = get_Builtin_param(node, 1);
4569 dbg_info *dbgi = get_irn_dbg_info(node);
4570 tarval *tv = get_Const_tarval(param);
4571 unsigned long value = get_tarval_long(tv);
4573 ir_node *block = be_transform_node(get_nodes_block(node));
4574 ir_node *ptr = be_transform_node(frame);
4578 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4579 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4580 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4583 /* load the return address from this frame */
4584 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4586 set_irn_pinned(load, get_irn_pinned(node));
4587 set_ia32_op_type(load, ia32_AddrModeS);
4588 set_ia32_ls_mode(load, mode_Iu);
4590 set_ia32_am_offs_int(load, 0);
4591 set_ia32_use_frame(load);
4592 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4594 if (get_irn_pinned(node) == op_pin_state_floats) {
4595 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4596 && pn_ia32_vfld_res == pn_ia32_Load_res
4597 && pn_ia32_Load_res == pn_ia32_res);
4598 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4601 SET_IA32_ORIG_NODE(load, node);
4602 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4606 * Transform Builtin frame_address
4608 static ir_node *gen_frame_address(ir_node *node) {
4609 ir_node *param = get_Builtin_param(node, 0);
4610 ir_node *frame = get_Builtin_param(node, 1);
4611 dbg_info *dbgi = get_irn_dbg_info(node);
4612 tarval *tv = get_Const_tarval(param);
4613 unsigned long value = get_tarval_long(tv);
4615 ir_node *block = be_transform_node(get_nodes_block(node));
4616 ir_node *ptr = be_transform_node(frame);
4621 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4622 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4623 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4626 /* load the frame address from this frame */
4627 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4629 set_irn_pinned(load, get_irn_pinned(node));
4630 set_ia32_op_type(load, ia32_AddrModeS);
4631 set_ia32_ls_mode(load, mode_Iu);
4633 ent = ia32_get_frame_address_entity();
4635 set_ia32_am_offs_int(load, 0);
4636 set_ia32_use_frame(load);
4637 set_ia32_frame_ent(load, ent);
4639 /* will fail anyway, but gcc does this: */
4640 set_ia32_am_offs_int(load, 0);
4643 if (get_irn_pinned(node) == op_pin_state_floats) {
4644 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4645 && pn_ia32_vfld_res == pn_ia32_Load_res
4646 && pn_ia32_Load_res == pn_ia32_res);
4647 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4650 SET_IA32_ORIG_NODE(load, node);
4651 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4655 * Transform Builtin frame_address
4657 static ir_node *gen_prefetch(ir_node *node) {
4659 ir_node *ptr, *block, *mem, *base, *index;
4660 ir_node *param, *new_node;
4663 ia32_address_t addr;
4665 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4666 /* no prefetch at all, route memory */
4667 return be_transform_node(get_Builtin_mem(node));
4670 param = get_Builtin_param(node, 1);
4671 tv = get_Const_tarval(param);
4672 rw = get_tarval_long(tv);
4674 /* construct load address */
4675 memset(&addr, 0, sizeof(addr));
4676 ptr = get_Builtin_param(node, 0);
4677 ia32_create_address_mode(&addr, ptr, 0);
4684 base = be_transform_node(base);
4687 if (index == NULL) {
4690 index = be_transform_node(index);
4693 dbgi = get_irn_dbg_info(node);
4694 block = be_transform_node(get_nodes_block(node));
4695 mem = be_transform_node(get_Builtin_mem(node));
4697 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4698 /* we have 3DNow!, this was already checked above */
4699 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4700 } else if (ia32_cg_config.use_sse_prefetch) {
4701 /* note: rw == 1 is IGNORED in that case */
4702 param = get_Builtin_param(node, 2);
4703 tv = get_Const_tarval(param);
4704 locality = get_tarval_long(tv);
4706 /* SSE style prefetch */
4709 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4712 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4715 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4718 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4722 assert(ia32_cg_config.use_3dnow_prefetch);
4723 /* 3DNow! style prefetch */
4724 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4727 set_irn_pinned(new_node, get_irn_pinned(node));
4728 set_ia32_op_type(new_node, ia32_AddrModeS);
4729 set_ia32_ls_mode(new_node, mode_Bu);
4730 set_address(new_node, &addr);
4732 SET_IA32_ORIG_NODE(new_node, node);
4734 be_dep_on_frame(new_node);
4735 return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M);
4739 * Transform bsf like node
4741 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4743 ir_node *param = get_Builtin_param(node, 0);
4744 dbg_info *dbgi = get_irn_dbg_info(node);
4746 ir_node *block = get_nodes_block(node);
4747 ir_node *new_block = be_transform_node(block);
4749 ia32_address_mode_t am;
4750 ia32_address_t *addr = &am.addr;
4753 match_arguments(&am, block, NULL, param, NULL, match_am);
4755 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4756 set_am_attributes(cnt, &am);
4757 set_ia32_ls_mode(cnt, get_irn_mode(param));
4759 SET_IA32_ORIG_NODE(cnt, node);
4760 return fix_mem_proj(cnt, &am);
4764 * Transform builtin ffs.
4766 static ir_node *gen_ffs(ir_node *node)
4768 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4769 ir_node *real = skip_Proj(bsf);
4770 dbg_info *dbgi = get_irn_dbg_info(real);
4771 ir_node *block = get_nodes_block(real);
4772 ir_node *flag, *set, *conv, *neg, *or;
4775 if (get_irn_mode(real) != mode_T) {
4776 set_irn_mode(real, mode_T);
4777 bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res);
4780 flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags);
4783 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4784 SET_IA32_ORIG_NODE(set, node);
4787 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4788 SET_IA32_ORIG_NODE(conv, node);
4791 neg = new_bd_ia32_Neg(dbgi, block, conv);
4794 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4795 set_ia32_commutative(or);
4798 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4802 * Transform builtin clz.
4804 static ir_node *gen_clz(ir_node *node)
4806 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4807 ir_node *real = skip_Proj(bsr);
4808 dbg_info *dbgi = get_irn_dbg_info(real);
4809 ir_node *block = get_nodes_block(real);
4810 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4812 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4816 * Transform builtin ctz.
4818 static ir_node *gen_ctz(ir_node *node)
4820 return gen_unop_AM(node, new_bd_ia32_Bsf);
4824 * Transform builtin parity.
4826 static ir_node *gen_parity(ir_node *node)
4828 ir_node *param = get_Builtin_param(node, 0);
4829 dbg_info *dbgi = get_irn_dbg_info(node);
4831 ir_node *block = get_nodes_block(node);
4833 ir_node *new_block = be_transform_node(block);
4834 ir_node *imm, *cmp, *new_node;
4836 ia32_address_mode_t am;
4837 ia32_address_t *addr = &am.addr;
4841 match_arguments(&am, block, NULL, param, NULL, match_am);
4842 imm = ia32_create_Immediate(NULL, 0, 0);
4843 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4844 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4845 set_am_attributes(cmp, &am);
4846 set_ia32_ls_mode(cmp, mode_Iu);
4848 SET_IA32_ORIG_NODE(cmp, node);
4850 cmp = fix_mem_proj(cmp, &am);
4853 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4854 SET_IA32_ORIG_NODE(new_node, node);
4857 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4858 nomem, new_node, mode_Bu);
4859 SET_IA32_ORIG_NODE(new_node, node);
4864 * Transform builtin popcount
4866 static ir_node *gen_popcount(ir_node *node) {
4867 ir_node *param = get_Builtin_param(node, 0);
4868 dbg_info *dbgi = get_irn_dbg_info(node);
4870 ir_node *block = get_nodes_block(node);
4871 ir_node *new_block = be_transform_node(block);
4874 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4876 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4877 if (ia32_cg_config.use_popcnt) {
4878 ia32_address_mode_t am;
4879 ia32_address_t *addr = &am.addr;
4882 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4884 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4885 set_am_attributes(cnt, &am);
4886 set_ia32_ls_mode(cnt, get_irn_mode(param));
4888 SET_IA32_ORIG_NODE(cnt, node);
4889 return fix_mem_proj(cnt, &am);
4892 new_param = be_transform_node(param);
4894 /* do the standard popcount algo */
4896 /* m1 = x & 0x55555555 */
4897 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4898 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4901 simm = ia32_create_Immediate(NULL, 0, 1);
4902 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4904 /* m2 = s1 & 0x55555555 */
4905 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4908 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4910 /* m4 = m3 & 0x33333333 */
4911 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4912 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4915 simm = ia32_create_Immediate(NULL, 0, 2);
4916 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4918 /* m5 = s2 & 0x33333333 */
4919 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4922 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4924 /* m7 = m6 & 0x0F0F0F0F */
4925 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4926 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4929 simm = ia32_create_Immediate(NULL, 0, 4);
4930 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4932 /* m8 = s3 & 0x0F0F0F0F */
4933 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4936 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4938 /* m10 = m9 & 0x00FF00FF */
4939 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4940 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4943 simm = ia32_create_Immediate(NULL, 0, 8);
4944 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4946 /* m11 = s4 & 0x00FF00FF */
4947 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4949 /* m12 = m10 + m11 */
4950 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4952 /* m13 = m12 & 0x0000FFFF */
4953 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4954 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4956 /* s5 = m12 >> 16 */
4957 simm = ia32_create_Immediate(NULL, 0, 16);
4958 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4960 /* res = m13 + s5 */
4961 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4965 * Transform builtin byte swap.
4967 static ir_node *gen_bswap(ir_node *node) {
4968 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4969 dbg_info *dbgi = get_irn_dbg_info(node);
4971 ir_node *block = get_nodes_block(node);
4972 ir_node *new_block = be_transform_node(block);
4973 ir_mode *mode = get_irn_mode(param);
4974 unsigned size = get_mode_size_bits(mode);
4975 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4979 if (ia32_cg_config.use_i486) {
4980 /* swap available */
4981 return new_bd_ia32_Bswap(dbgi, new_block, param);
4983 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4984 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4986 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4987 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4989 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4991 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4992 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4994 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4995 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4998 /* swap16 always available */
4999 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5002 panic("Invalid bswap size (%d)", size);
5007 * Transform builtin outport.
5009 static ir_node *gen_outport(ir_node *node) {
5010 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5011 ir_node *oldv = get_Builtin_param(node, 1);
5012 ir_mode *mode = get_irn_mode(oldv);
5013 ir_node *value = be_transform_node(oldv);
5014 ir_node *block = be_transform_node(get_nodes_block(node));
5015 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5016 dbg_info *dbgi = get_irn_dbg_info(node);
5018 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5019 set_ia32_ls_mode(res, mode);
5024 * Transform builtin inport.
5026 static ir_node *gen_inport(ir_node *node) {
5027 ir_type *tp = get_Builtin_type(node);
5028 ir_type *rstp = get_method_res_type(tp, 0);
5029 ir_mode *mode = get_type_mode(rstp);
5030 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5031 ir_node *block = be_transform_node(get_nodes_block(node));
5032 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5033 dbg_info *dbgi = get_irn_dbg_info(node);
5035 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5036 set_ia32_ls_mode(res, mode);
5038 /* check for missing Result Proj */
5043 * Transform a builtin inner trampoline
5045 static ir_node *gen_inner_trampoline(ir_node *node) {
5046 ir_node *ptr = get_Builtin_param(node, 0);
5047 ir_node *callee = get_Builtin_param(node, 1);
5048 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5049 ir_node *mem = get_Builtin_mem(node);
5050 ir_node *block = get_nodes_block(node);
5051 ir_node *new_block = be_transform_node(block);
5055 ir_node *trampoline;
5057 dbg_info *dbgi = get_irn_dbg_info(node);
5058 ia32_address_t addr;
5060 /* construct store address */
5061 memset(&addr, 0, sizeof(addr));
5062 ia32_create_address_mode(&addr, ptr, 0);
5064 if (addr.base == NULL) {
5065 addr.base = noreg_GP;
5067 addr.base = be_transform_node(addr.base);
5070 if (addr.index == NULL) {
5071 addr.index = noreg_GP;
5073 addr.index = be_transform_node(addr.index);
5075 addr.mem = be_transform_node(mem);
5077 /* mov ecx, <env> */
5078 val = ia32_create_Immediate(NULL, 0, 0xB9);
5079 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5080 addr.index, addr.mem, val);
5081 set_irn_pinned(store, get_irn_pinned(node));
5082 set_ia32_op_type(store, ia32_AddrModeD);
5083 set_ia32_ls_mode(store, mode_Bu);
5084 set_address(store, &addr);
5088 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5089 addr.index, addr.mem, env);
5090 set_irn_pinned(store, get_irn_pinned(node));
5091 set_ia32_op_type(store, ia32_AddrModeD);
5092 set_ia32_ls_mode(store, mode_Iu);
5093 set_address(store, &addr);
5097 /* jmp rel <callee> */
5098 val = ia32_create_Immediate(NULL, 0, 0xE9);
5099 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5100 addr.index, addr.mem, val);
5101 set_irn_pinned(store, get_irn_pinned(node));
5102 set_ia32_op_type(store, ia32_AddrModeD);
5103 set_ia32_ls_mode(store, mode_Bu);
5104 set_address(store, &addr);
5108 trampoline = be_transform_node(ptr);
5110 /* the callee is typically an immediate */
5111 if (is_SymConst(callee)) {
5112 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5114 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5116 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5118 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5119 addr.index, addr.mem, rel);
5120 set_irn_pinned(store, get_irn_pinned(node));
5121 set_ia32_op_type(store, ia32_AddrModeD);
5122 set_ia32_ls_mode(store, mode_Iu);
5123 set_address(store, &addr);
5128 return new_r_Tuple(current_ir_graph, new_block, 2, in);
5132 * Transform Builtin node.
5134 static ir_node *gen_Builtin(ir_node *node) {
5135 ir_builtin_kind kind = get_Builtin_kind(node);
5139 return gen_trap(node);
5140 case ir_bk_debugbreak:
5141 return gen_debugbreak(node);
5142 case ir_bk_return_address:
5143 return gen_return_address(node);
5144 case ir_bk_frame_address:
5145 return gen_frame_address(node);
5146 case ir_bk_prefetch:
5147 return gen_prefetch(node);
5149 return gen_ffs(node);
5151 return gen_clz(node);
5153 return gen_ctz(node);
5155 return gen_parity(node);
5156 case ir_bk_popcount:
5157 return gen_popcount(node);
5159 return gen_bswap(node);
5161 return gen_outport(node);
5163 return gen_inport(node);
5164 case ir_bk_inner_trampoline:
5165 return gen_inner_trampoline(node);
5167 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5171 * Transform Proj(Builtin) node.
5173 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5174 ir_node *node = get_Proj_pred(proj);
5175 ir_node *new_node = be_transform_node(node);
5176 ir_builtin_kind kind = get_Builtin_kind(node);
5179 case ir_bk_return_address:
5180 case ir_bk_frame_address:
5185 case ir_bk_popcount:
5187 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5190 case ir_bk_debugbreak:
5191 case ir_bk_prefetch:
5193 assert(get_Proj_proj(proj) == pn_Builtin_M);
5196 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5197 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5198 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5200 assert(get_Proj_proj(proj) == pn_Builtin_M);
5201 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5202 new_node, mode_M, pn_ia32_Inport_M);
5204 case ir_bk_inner_trampoline:
5205 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5206 return get_Tuple_pred(new_node, 1);
5208 assert(get_Proj_proj(proj) == pn_Builtin_M);
5209 return get_Tuple_pred(new_node, 0);
5212 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5215 static ir_node *gen_be_IncSP(ir_node *node)
5217 ir_node *res = be_duplicate_node(node);
5218 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5224 * Transform the Projs from a be_Call.
5226 static ir_node *gen_Proj_be_Call(ir_node *node)
5228 ir_node *block = be_transform_node(get_nodes_block(node));
5229 ir_node *call = get_Proj_pred(node);
5230 ir_node *new_call = be_transform_node(call);
5231 ir_graph *irg = current_ir_graph;
5232 dbg_info *dbgi = get_irn_dbg_info(node);
5233 long proj = get_Proj_proj(node);
5234 ir_mode *mode = get_irn_mode(node);
5237 if (proj == pn_be_Call_M_regular) {
5238 return new_rd_Proj(dbgi, irg, block, new_call, mode_M, n_ia32_Call_mem);
5240 /* transform call modes */
5241 if (mode_is_data(mode)) {
5242 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5246 /* Map from be_Call to ia32_Call proj number */
5247 if (proj == pn_be_Call_sp) {
5248 proj = pn_ia32_Call_stack;
5249 } else if (proj == pn_be_Call_M_regular) {
5250 proj = pn_ia32_Call_M;
5252 arch_register_req_t const *const req = arch_get_register_req_out(node);
5253 int const n_outs = arch_irn_get_n_outs(new_call);
5256 assert(proj >= pn_be_Call_first_res);
5257 assert(req->type & arch_register_req_type_limited);
5259 for (i = 0; i < n_outs; ++i) {
5260 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5262 if (!(new_req->type & arch_register_req_type_limited) ||
5263 new_req->cls != req->cls ||
5264 *new_req->limited != *req->limited)
5273 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
5275 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5277 case pn_ia32_Call_stack:
5278 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5281 case pn_ia32_Call_fpcw:
5282 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5290 * Transform the Projs from a Cmp.
5292 static ir_node *gen_Proj_Cmp(ir_node *node)
5294 /* this probably means not all mode_b nodes were lowered... */
5295 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5300 * Transform the Projs from a Bound.
5302 static ir_node *gen_Proj_Bound(ir_node *node)
5304 ir_node *new_node, *block;
5305 ir_node *pred = get_Proj_pred(node);
5307 switch (get_Proj_proj(node)) {
5309 return be_transform_node(get_Bound_mem(pred));
5310 case pn_Bound_X_regular:
5311 new_node = be_transform_node(pred);
5312 block = get_nodes_block(new_node);
5313 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
5314 case pn_Bound_X_except:
5315 new_node = be_transform_node(pred);
5316 block = get_nodes_block(new_node);
5317 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
5319 return be_transform_node(get_Bound_index(pred));
5321 panic("unsupported Proj from Bound");
5325 static ir_node *gen_Proj_ASM(ir_node *node)
5327 ir_mode *mode = get_irn_mode(node);
5328 ir_node *pred = get_Proj_pred(node);
5329 ir_node *new_pred = be_transform_node(pred);
5330 ir_node *block = get_nodes_block(new_pred);
5331 long pos = get_Proj_proj(node);
5333 if (mode == mode_M) {
5334 pos = arch_irn_get_n_outs(new_pred) + 1;
5335 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5337 } else if (mode_is_float(mode)) {
5340 panic("unexpected proj mode at ASM");
5343 return new_r_Proj(current_ir_graph, block, new_pred, mode, pos);
5347 * Transform and potentially renumber Proj nodes.
5349 static ir_node *gen_Proj(ir_node *node)
5351 ir_node *pred = get_Proj_pred(node);
5354 switch (get_irn_opcode(pred)) {
5356 proj = get_Proj_proj(node);
5357 if (proj == pn_Store_M) {
5358 return be_transform_node(pred);
5360 panic("No idea how to transform proj->Store");
5363 return gen_Proj_Load(node);
5365 return gen_Proj_ASM(node);
5367 return gen_Proj_Builtin(node);
5371 return gen_Proj_DivMod(node);
5373 return gen_Proj_CopyB(node);
5375 return gen_Proj_Quot(node);
5377 return gen_Proj_be_SubSP(node);
5379 return gen_Proj_be_AddSP(node);
5381 return gen_Proj_be_Call(node);
5383 return gen_Proj_Cmp(node);
5385 return gen_Proj_Bound(node);
5387 proj = get_Proj_proj(node);
5389 case pn_Start_X_initial_exec: {
5390 ir_node *block = get_nodes_block(pred);
5391 ir_node *new_block = be_transform_node(block);
5392 dbg_info *dbgi = get_irn_dbg_info(node);
5393 /* we exchange the ProjX with a jump */
5394 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
5399 case pn_Start_P_tls:
5400 return gen_Proj_tls(node);
5405 if (is_ia32_l_FloattoLL(pred)) {
5406 return gen_Proj_l_FloattoLL(node);
5408 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5412 ir_mode *mode = get_irn_mode(node);
5413 if (ia32_mode_needs_gp_reg(mode)) {
5414 ir_node *new_pred = be_transform_node(pred);
5415 ir_node *block = be_transform_node(get_nodes_block(node));
5416 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5417 mode_Iu, get_Proj_proj(node));
5418 new_proj->node_nr = node->node_nr;
5423 return be_duplicate_node(node);
5427 * Enters all transform functions into the generic pointer
5429 static void register_transformers(void)
5431 /* first clear the generic function pointer for all ops */
5432 clear_irp_opcodes_generic_func();
5434 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5435 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5474 /* transform ops from intrinsic lowering */
5486 GEN(ia32_l_LLtoFloat);
5487 GEN(ia32_l_FloattoLL);
5493 /* we should never see these nodes */
5508 /* handle builtins */
5511 /* handle generic backend nodes */
5525 * Pre-transform all unknown and noreg nodes.
5527 static void ia32_pretransform_node(void)
5529 ia32_code_gen_t *cg = env_cg;
5531 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5532 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5533 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5534 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5535 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5536 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5538 nomem = get_irg_no_mem(current_ir_graph);
5539 noreg_GP = ia32_new_NoReg_gp(cg);
5545 * Walker, checks if all ia32 nodes producing more than one result have their
5546 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5548 static void add_missing_keep_walker(ir_node *node, void *data)
5551 unsigned found_projs = 0;
5552 const ir_edge_t *edge;
5553 ir_mode *mode = get_irn_mode(node);
5558 if (!is_ia32_irn(node))
5561 n_outs = arch_irn_get_n_outs(node);
5564 if (is_ia32_SwitchJmp(node))
5567 assert(n_outs < (int) sizeof(unsigned) * 8);
5568 foreach_out_edge(node, edge) {
5569 ir_node *proj = get_edge_src_irn(edge);
5572 /* The node could be kept */
5576 if (get_irn_mode(proj) == mode_M)
5579 pn = get_Proj_proj(proj);
5580 assert(pn < n_outs);
5581 found_projs |= 1 << pn;
5585 /* are keeps missing? */
5587 for (i = 0; i < n_outs; ++i) {
5590 const arch_register_req_t *req;
5591 const arch_register_class_t *cls;
5593 if (found_projs & (1 << i)) {
5597 req = get_ia32_out_req(node, i);
5602 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5606 block = get_nodes_block(node);
5607 in[0] = new_r_Proj(current_ir_graph, block, node,
5608 arch_register_class_mode(cls), i);
5609 if (last_keep != NULL) {
5610 be_Keep_add_node(last_keep, cls, in[0]);
5612 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
5613 if (sched_is_scheduled(node)) {
5614 sched_add_after(node, last_keep);
5621 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5624 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5626 ir_graph *irg = be_get_birg_irg(cg->birg);
5627 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5631 * Post-process all calls if we are in SSE mode.
5632 * The ABI requires that the results are in st0, copy them
5633 * to a xmm register.
5635 static void postprocess_fp_call_results(void) {
5638 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5639 ir_node *call = call_list[i];
5640 ir_type *mtp = call_types[i];
5643 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5644 ir_type *res_tp = get_method_res_type(mtp, j);
5645 ir_node *res, *new_res;
5646 const ir_edge_t *edge, *next;
5649 if (! is_atomic_type(res_tp)) {
5650 /* no floating point return */
5653 mode = get_type_mode(res_tp);
5654 if (! mode_is_float(mode)) {
5655 /* no floating point return */
5659 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5662 /* now patch the users */
5663 foreach_out_edge_safe(res, edge, next) {
5664 ir_node *succ = get_edge_src_irn(edge);
5667 if (be_is_Keep(succ))
5670 if (is_ia32_xStore(succ)) {
5671 /* an xStore can be patched into an vfst */
5672 dbg_info *db = get_irn_dbg_info(succ);
5673 ir_node *block = get_nodes_block(succ);
5674 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5675 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5676 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5677 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5678 ir_mode *mode = get_ia32_ls_mode(succ);
5680 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5681 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5682 if (is_ia32_use_frame(succ))
5683 set_ia32_use_frame(st);
5684 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5685 set_irn_pinned(st, get_irn_pinned(succ));
5686 set_ia32_op_type(st, ia32_AddrModeD);
5690 if (new_res == NULL) {
5691 dbg_info *db = get_irn_dbg_info(call);
5692 ir_node *block = get_nodes_block(call);
5693 ir_node *frame = get_irg_frame(current_ir_graph);
5694 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5695 ir_node *call_mem = new_r_Proj(current_ir_graph, block, call, mode_M, pn_ia32_Call_M);
5696 ir_node *vfst, *xld, *new_mem;
5698 /* store st(0) on stack */
5699 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5700 set_ia32_op_type(vfst, ia32_AddrModeD);
5701 set_ia32_use_frame(vfst);
5703 /* load into SSE register */
5704 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5705 set_ia32_op_type(xld, ia32_AddrModeS);
5706 set_ia32_use_frame(xld);
5708 new_res = new_r_Proj(current_ir_graph, block, xld, mode, pn_ia32_xLoad_res);
5709 new_mem = new_r_Proj(current_ir_graph, block, xld, mode_M, pn_ia32_xLoad_M);
5711 if (old_mem != NULL) {
5712 edges_reroute(old_mem, new_mem, current_ir_graph);
5716 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5723 /* do the transformation */
5724 void ia32_transform_graph(ia32_code_gen_t *cg)
5728 register_transformers();
5730 initial_fpcw = NULL;
5733 BE_TIMER_PUSH(t_heights);
5734 heights = heights_new(cg->irg);
5735 BE_TIMER_POP(t_heights);
5736 ia32_calculate_non_address_mode_nodes(cg->birg);
5738 /* the transform phase is not safe for CSE (yet) because several nodes get
5739 * attributes set after their creation */
5740 cse_last = get_opt_cse();
5743 call_list = NEW_ARR_F(ir_node *, 0);
5744 call_types = NEW_ARR_F(ir_type *, 0);
5745 be_transform_graph(cg->birg, ia32_pretransform_node);
5747 if (ia32_cg_config.use_sse2)
5748 postprocess_fp_call_results();
5749 DEL_ARR_F(call_types);
5750 DEL_ARR_F(call_list);
5752 set_opt_cse(cse_last);
5754 ia32_free_non_address_mode_nodes();
5755 heights_free(heights);
5759 void ia32_init_transform(void)
5761 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");