2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
293 floatent = create_float_const_entity(node);
294 /* create_float_const_ent is smart and sometimes creates
296 ls_mode = get_type_mode(get_entity_type(floatent));
298 if (env_cg->birg->main_env->options->pic) {
299 base = arch_code_generator_get_pic_base(env_cg);
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 be_dep_on_frame(load);
319 } else { /* non-float mode */
321 tarval *tv = get_Const_tarval(node);
324 tv = tarval_convert_to(tv, mode_Iu);
326 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
328 panic("couldn't convert constant tarval (%+F)", node);
330 val = get_tarval_long(tv);
332 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
333 SET_IA32_ORIG_NODE(cnst, node);
335 be_dep_on_frame(cnst);
341 * Transforms a SymConst.
343 static ir_node *gen_SymConst(ir_node *node)
345 ir_node *old_block = get_nodes_block(node);
346 ir_node *block = be_transform_node(old_block);
347 dbg_info *dbgi = get_irn_dbg_info(node);
348 ir_mode *mode = get_irn_mode(node);
351 if (mode_is_float(mode)) {
352 if (ia32_cg_config.use_sse2)
353 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
355 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
356 set_ia32_am_sc(cnst, get_SymConst_entity(node));
357 set_ia32_use_frame(cnst);
361 if (get_SymConst_kind(node) != symconst_addr_ent) {
362 panic("backend only support symconst_addr_ent (at %+F)", node);
364 entity = get_SymConst_entity(node);
365 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
368 SET_IA32_ORIG_NODE(cnst, node);
370 be_dep_on_frame(cnst);
375 * Create a float type for the given mode and cache it.
377 * @param mode the mode for the float type (might be integer mode for SSE2 types)
378 * @param align alignment
380 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
386 if (mode == mode_Iu) {
387 static ir_type *int_Iu[16] = {NULL, };
389 if (int_Iu[align] == NULL) {
390 int_Iu[align] = tp = new_type_primitive(mode);
391 /* set the specified alignment */
392 set_type_alignment_bytes(tp, align);
394 return int_Iu[align];
395 } else if (mode == mode_Lu) {
396 static ir_type *int_Lu[16] = {NULL, };
398 if (int_Lu[align] == NULL) {
399 int_Lu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Lu[align];
404 } else if (mode == mode_F) {
405 static ir_type *float_F[16] = {NULL, };
407 if (float_F[align] == NULL) {
408 float_F[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return float_F[align];
413 } else if (mode == mode_D) {
414 static ir_type *float_D[16] = {NULL, };
416 if (float_D[align] == NULL) {
417 float_D[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_D[align];
423 static ir_type *float_E[16] = {NULL, };
425 if (float_E[align] == NULL) {
426 float_E[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_E[align];
435 * Create a float[2] array type for the given atomic type.
437 * @param tp the atomic type
439 static ir_type *ia32_create_float_array(ir_type *tp)
441 ir_mode *mode = get_type_mode(tp);
442 unsigned align = get_type_alignment_bytes(tp);
447 if (mode == mode_F) {
448 static ir_type *float_F[16] = {NULL, };
450 if (float_F[align] != NULL)
451 return float_F[align];
452 arr = float_F[align] = new_type_array(1, tp);
453 } else if (mode == mode_D) {
454 static ir_type *float_D[16] = {NULL, };
456 if (float_D[align] != NULL)
457 return float_D[align];
458 arr = float_D[align] = new_type_array(1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 arr = float_E[align] = new_type_array(1, tp);
466 set_type_alignment_bytes(arr, align);
467 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
468 set_type_state(arr, layout_fixed);
472 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
473 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
475 static const struct {
476 const char *ent_name;
477 const char *cnst_str;
480 } names [ia32_known_const_max] = {
481 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
482 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
483 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
484 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
485 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
487 static ir_entity *ent_cache[ia32_known_const_max];
489 const char *ent_name, *cnst_str;
495 ent_name = names[kct].ent_name;
496 if (! ent_cache[kct]) {
497 cnst_str = names[kct].cnst_str;
499 switch (names[kct].mode) {
500 case 0: mode = mode_Iu; break;
501 case 1: mode = mode_Lu; break;
502 default: mode = mode_F; break;
504 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
505 tp = ia32_create_float_type(mode, names[kct].align);
507 if (kct == ia32_ULLBIAS)
508 tp = ia32_create_float_array(tp);
509 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
511 set_entity_ld_ident(ent, get_entity_ident(ent));
512 set_entity_visibility(ent, visibility_local);
513 set_entity_variability(ent, variability_constant);
514 set_entity_allocation(ent, allocation_static);
516 if (kct == ia32_ULLBIAS) {
517 ir_initializer_t *initializer = create_initializer_compound(2);
519 set_initializer_compound_value(initializer, 0,
520 create_initializer_tarval(get_tarval_null(mode)));
521 set_initializer_compound_value(initializer, 1,
522 create_initializer_tarval(tv));
524 set_entity_initializer(ent, initializer);
526 set_entity_initializer(ent, create_initializer_tarval(tv));
529 /* cache the entry */
530 ent_cache[kct] = ent;
533 return ent_cache[kct];
537 * return true if the node is a Proj(Load) and could be used in source address
538 * mode for another node. Will return only true if the @p other node is not
539 * dependent on the memory of the Load (for binary operations use the other
540 * input here, for unary operations use NULL).
542 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
543 ir_node *other, ir_node *other2, match_flags_t flags)
548 /* float constants are always available */
549 if (is_Const(node)) {
550 ir_mode *mode = get_irn_mode(node);
551 if (mode_is_float(mode)) {
552 if (ia32_cg_config.use_sse2) {
553 if (is_simple_sse_Const(node))
556 if (is_simple_x87_Const(node))
559 if (get_irn_n_edges(node) > 1)
567 load = get_Proj_pred(node);
568 pn = get_Proj_proj(node);
569 if (!is_Load(load) || pn != pn_Load_res)
571 if (get_nodes_block(load) != block)
573 /* we only use address mode if we're the only user of the load */
574 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
576 /* in some edge cases with address mode we might reach the load normally
577 * and through some AM sequence, if it is already materialized then we
578 * can't create an AM node from it */
579 if (be_is_transformed(node))
582 /* don't do AM if other node inputs depend on the load (via mem-proj) */
583 if (other != NULL && prevents_AM(block, load, other))
586 if (other2 != NULL && prevents_AM(block, load, other2))
592 typedef struct ia32_address_mode_t ia32_address_mode_t;
593 struct ia32_address_mode_t {
598 ia32_op_type_t op_type;
602 unsigned commutative : 1;
603 unsigned ins_permuted : 1;
606 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
608 /* construct load address */
609 memset(addr, 0, sizeof(addr[0]));
610 ia32_create_address_mode(addr, ptr, 0);
612 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
613 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
614 addr->mem = be_transform_node(mem);
617 static void build_address(ia32_address_mode_t *am, ir_node *node,
618 ia32_create_am_flags_t flags)
620 ia32_address_t *addr = &am->addr;
626 if (is_Const(node)) {
627 ir_entity *entity = create_float_const_entity(node);
628 addr->base = noreg_GP;
629 addr->index = noreg_GP;
631 addr->symconst_ent = entity;
633 am->ls_mode = get_type_mode(get_entity_type(entity));
634 am->pinned = op_pin_state_floats;
638 load = get_Proj_pred(node);
639 ptr = get_Load_ptr(load);
640 mem = get_Load_mem(load);
641 new_mem = be_transform_node(mem);
642 am->pinned = get_irn_pinned(load);
643 am->ls_mode = get_Load_mode(load);
644 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
647 /* construct load address */
648 ia32_create_address_mode(addr, ptr, flags);
650 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
651 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
655 static void set_address(ir_node *node, const ia32_address_t *addr)
657 set_ia32_am_scale(node, addr->scale);
658 set_ia32_am_sc(node, addr->symconst_ent);
659 set_ia32_am_offs_int(node, addr->offset);
660 if (addr->symconst_sign)
661 set_ia32_am_sc_sign(node);
663 set_ia32_use_frame(node);
664 set_ia32_frame_ent(node, addr->frame_entity);
668 * Apply attributes of a given address mode to a node.
670 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
672 set_address(node, &am->addr);
674 set_ia32_op_type(node, am->op_type);
675 set_ia32_ls_mode(node, am->ls_mode);
676 if (am->pinned == op_pin_state_pinned) {
677 /* beware: some nodes are already pinned and did not allow to change the state */
678 if (get_irn_pinned(node) != op_pin_state_pinned)
679 set_irn_pinned(node, op_pin_state_pinned);
682 set_ia32_commutative(node);
686 * Check, if a given node is a Down-Conv, ie. a integer Conv
687 * from a mode with a mode with more bits to a mode with lesser bits.
688 * Moreover, we return only true if the node has not more than 1 user.
690 * @param node the node
691 * @return non-zero if node is a Down-Conv
693 static int is_downconv(const ir_node *node)
701 /* we only want to skip the conv when we're the only user
702 * (not optimal but for now...)
704 if (get_irn_n_edges(node) > 1)
707 src_mode = get_irn_mode(get_Conv_op(node));
708 dest_mode = get_irn_mode(node);
710 ia32_mode_needs_gp_reg(src_mode) &&
711 ia32_mode_needs_gp_reg(dest_mode) &&
712 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
715 /* Skip all Down-Conv's on a given node and return the resulting node. */
716 ir_node *ia32_skip_downconv(ir_node *node)
718 while (is_downconv(node))
719 node = get_Conv_op(node);
724 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
726 ir_mode *mode = get_irn_mode(node);
731 if (mode_is_signed(mode)) {
736 block = get_nodes_block(node);
737 dbgi = get_irn_dbg_info(node);
739 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
743 * matches operands of a node into ia32 addressing/operand modes. This covers
744 * usage of source address mode, immediates, operations with non 32-bit modes,
746 * The resulting data is filled into the @p am struct. block is the block
747 * of the node whose arguments are matched. op1, op2 are the first and second
748 * input that are matched (op1 may be NULL). other_op is another unrelated
749 * input that is not matched! but which is needed sometimes to check if AM
750 * for op1/op2 is legal.
751 * @p flags describes the supported modes of the operation in detail.
753 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
754 ir_node *op1, ir_node *op2, ir_node *other_op,
757 ia32_address_t *addr = &am->addr;
758 ir_mode *mode = get_irn_mode(op2);
759 int mode_bits = get_mode_size_bits(mode);
760 ir_node *new_op1, *new_op2;
762 unsigned commutative;
763 int use_am_and_immediates;
766 memset(am, 0, sizeof(am[0]));
768 commutative = (flags & match_commutative) != 0;
769 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
770 use_am = (flags & match_am) != 0;
771 use_immediate = (flags & match_immediate) != 0;
772 assert(!use_am_and_immediates || use_immediate);
775 assert(!commutative || op1 != NULL);
776 assert(use_am || !(flags & match_8bit_am));
777 assert(use_am || !(flags & match_16bit_am));
779 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
780 (mode_bits == 16 && !(flags & match_16bit_am))) {
784 /* we can simply skip downconvs for mode neutral nodes: the upper bits
785 * can be random for these operations */
786 if (flags & match_mode_neutral) {
787 op2 = ia32_skip_downconv(op2);
789 op1 = ia32_skip_downconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, 0);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(env_cg);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, 0);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(env_cg);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = 1;
829 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 mode = get_irn_mode(op2);
841 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
842 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
844 new_op2 = create_upconv(op2, NULL);
845 am->ls_mode = mode_Iu;
847 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 new_op2 = be_transform_node(op2);
850 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
853 if (addr->base == NULL)
854 addr->base = noreg_GP;
855 if (addr->index == NULL)
856 addr->index = noreg_GP;
857 if (addr->mem == NULL)
860 am->new_op1 = new_op1;
861 am->new_op2 = new_op2;
862 am->commutative = commutative;
866 * "Fixes" a node that uses address mode by turning it into mode_T
867 * and returning a pn_ia32_res Proj.
869 * @param node the node
870 * @param am its address mode
872 * @return a Proj(pn_ia32_res) if a memory address mode is used,
875 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
880 if (am->mem_proj == NULL)
883 /* we have to create a mode_T so the old MemProj can attach to us */
884 mode = get_irn_mode(node);
885 load = get_Proj_pred(am->mem_proj);
887 be_set_transformed_node(load, node);
889 if (mode != mode_T) {
890 set_irn_mode(node, mode_T);
891 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
898 * Construct a standard binary operation, set AM and immediate if required.
900 * @param node The original node for which the binop is created
901 * @param op1 The first operand
902 * @param op2 The second operand
903 * @param func The node constructor function
904 * @return The constructed ia32 node.
906 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
907 construct_binop_func *func, match_flags_t flags)
910 ir_node *block, *new_block, *new_node;
911 ia32_address_mode_t am;
912 ia32_address_t *addr = &am.addr;
914 block = get_nodes_block(node);
915 match_arguments(&am, block, op1, op2, NULL, flags);
917 dbgi = get_irn_dbg_info(node);
918 new_block = be_transform_node(block);
919 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
920 am.new_op1, am.new_op2);
921 set_am_attributes(new_node, &am);
922 /* we can't use source address mode anymore when using immediates */
923 if (!(flags & match_am_and_immediates) &&
924 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
925 set_ia32_am_support(new_node, ia32_am_none);
926 SET_IA32_ORIG_NODE(new_node, node);
928 new_node = fix_mem_proj(new_node, &am);
934 * Generic names for the inputs of an ia32 binary op.
937 n_ia32_l_binop_left, /**< ia32 left input */
938 n_ia32_l_binop_right, /**< ia32 right input */
939 n_ia32_l_binop_eflags /**< ia32 eflags input */
941 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
942 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
943 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
944 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
945 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
946 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
949 * Construct a binary operation which also consumes the eflags.
951 * @param node The node to transform
952 * @param func The node constructor function
953 * @param flags The match flags
954 * @return The constructor ia32 node
956 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
959 ir_node *src_block = get_nodes_block(node);
960 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
961 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
962 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
964 ir_node *block, *new_node, *new_eflags;
965 ia32_address_mode_t am;
966 ia32_address_t *addr = &am.addr;
968 match_arguments(&am, src_block, op1, op2, eflags, flags);
970 dbgi = get_irn_dbg_info(node);
971 block = be_transform_node(src_block);
972 new_eflags = be_transform_node(eflags);
973 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
974 am.new_op1, am.new_op2, new_eflags);
975 set_am_attributes(new_node, &am);
976 /* we can't use source address mode anymore when using immediates */
977 if (!(flags & match_am_and_immediates) &&
978 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
979 set_ia32_am_support(new_node, ia32_am_none);
980 SET_IA32_ORIG_NODE(new_node, node);
982 new_node = fix_mem_proj(new_node, &am);
987 static ir_node *get_fpcw(void)
990 if (initial_fpcw != NULL)
993 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
994 &ia32_fp_cw_regs[REG_FPCW]);
995 initial_fpcw = be_transform_node(fpcw);
1001 * Construct a standard binary operation, set AM and immediate if required.
1003 * @param op1 The first operand
1004 * @param op2 The second operand
1005 * @param func The node constructor function
1006 * @return The constructed ia32 node.
1008 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1009 construct_binop_float_func *func)
1011 ir_mode *mode = get_irn_mode(node);
1013 ir_node *block, *new_block, *new_node;
1014 ia32_address_mode_t am;
1015 ia32_address_t *addr = &am.addr;
1016 ia32_x87_attr_t *attr;
1017 /* All operations are considered commutative, because there are reverse
1019 match_flags_t flags = match_commutative;
1021 /* happens for div nodes... */
1023 mode = get_divop_resmod(node);
1025 /* cannot use address mode with long double on x87 */
1026 if (get_mode_size_bits(mode) <= 64)
1029 block = get_nodes_block(node);
1030 match_arguments(&am, block, op1, op2, NULL, flags);
1032 dbgi = get_irn_dbg_info(node);
1033 new_block = be_transform_node(block);
1034 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1035 am.new_op1, am.new_op2, get_fpcw());
1036 set_am_attributes(new_node, &am);
1038 attr = get_ia32_x87_attr(new_node);
1039 attr->attr.data.ins_permuted = am.ins_permuted;
1041 SET_IA32_ORIG_NODE(new_node, node);
1043 new_node = fix_mem_proj(new_node, &am);
1049 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_shift_func *func,
1058 match_flags_t flags)
1061 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1063 assert(! mode_is_float(get_irn_mode(node)));
1064 assert(flags & match_immediate);
1065 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1067 if (flags & match_mode_neutral) {
1068 op1 = ia32_skip_downconv(op1);
1069 new_op1 = be_transform_node(op1);
1070 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1071 new_op1 = create_upconv(op1, node);
1073 new_op1 = be_transform_node(op1);
1076 /* the shift amount can be any mode that is bigger than 5 bits, since all
1077 * other bits are ignored anyway */
1078 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1079 ir_node *const op = get_Conv_op(op2);
1080 if (mode_is_float(get_irn_mode(op)))
1083 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1085 new_op2 = create_immediate_or_transform(op2, 0);
1087 dbgi = get_irn_dbg_info(node);
1088 block = get_nodes_block(node);
1089 new_block = be_transform_node(block);
1090 new_node = func(dbgi, new_block, new_op1, new_op2);
1091 SET_IA32_ORIG_NODE(new_node, node);
1093 /* lowered shift instruction may have a dependency operand, handle it here */
1094 if (get_irn_arity(node) == 3) {
1095 /* we have a dependency */
1096 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1097 add_irn_dep(new_node, new_dep);
1105 * Construct a standard unary operation, set AM and immediate if required.
1107 * @param op The operand
1108 * @param func The node constructor function
1109 * @return The constructed ia32 node.
1111 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1112 match_flags_t flags)
1115 ir_node *block, *new_block, *new_op, *new_node;
1117 assert(flags == 0 || flags == match_mode_neutral);
1118 if (flags & match_mode_neutral) {
1119 op = ia32_skip_downconv(op);
1122 new_op = be_transform_node(op);
1123 dbgi = get_irn_dbg_info(node);
1124 block = get_nodes_block(node);
1125 new_block = be_transform_node(block);
1126 new_node = func(dbgi, new_block, new_op);
1128 SET_IA32_ORIG_NODE(new_node, node);
1133 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1134 ia32_address_t *addr)
1136 ir_node *base, *index, *res;
1142 base = be_transform_node(base);
1145 index = addr->index;
1146 if (index == NULL) {
1149 index = be_transform_node(index);
1152 res = new_bd_ia32_Lea(dbgi, block, base, index);
1153 set_address(res, addr);
1159 * Returns non-zero if a given address mode has a symbolic or
1160 * numerical offset != 0.
1162 static int am_has_immediates(const ia32_address_t *addr)
1164 return addr->offset != 0 || addr->symconst_ent != NULL
1165 || addr->frame_entity || addr->use_frame;
1169 * Creates an ia32 Add.
1171 * @return the created ia32 Add node
1173 static ir_node *gen_Add(ir_node *node)
1175 ir_mode *mode = get_irn_mode(node);
1176 ir_node *op1 = get_Add_left(node);
1177 ir_node *op2 = get_Add_right(node);
1179 ir_node *block, *new_block, *new_node, *add_immediate_op;
1180 ia32_address_t addr;
1181 ia32_address_mode_t am;
1183 if (mode_is_float(mode)) {
1184 if (ia32_cg_config.use_sse2)
1185 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1186 match_commutative | match_am);
1188 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1191 ia32_mark_non_am(node);
1193 op2 = ia32_skip_downconv(op2);
1194 op1 = ia32_skip_downconv(op1);
1198 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1199 * 1. Add with immediate -> Lea
1200 * 2. Add with possible source address mode -> Add
1201 * 3. Otherwise -> Lea
1203 memset(&addr, 0, sizeof(addr));
1204 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1205 add_immediate_op = NULL;
1207 dbgi = get_irn_dbg_info(node);
1208 block = get_nodes_block(node);
1209 new_block = be_transform_node(block);
1212 if (addr.base == NULL && addr.index == NULL) {
1213 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1214 addr.symconst_sign, 0, addr.offset);
1215 be_dep_on_frame(new_node);
1216 SET_IA32_ORIG_NODE(new_node, node);
1219 /* add with immediate? */
1220 if (addr.index == NULL) {
1221 add_immediate_op = addr.base;
1222 } else if (addr.base == NULL && addr.scale == 0) {
1223 add_immediate_op = addr.index;
1226 if (add_immediate_op != NULL) {
1227 if (!am_has_immediates(&addr)) {
1228 #ifdef DEBUG_libfirm
1229 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1232 return be_transform_node(add_immediate_op);
1235 new_node = create_lea_from_address(dbgi, new_block, &addr);
1236 SET_IA32_ORIG_NODE(new_node, node);
1240 /* test if we can use source address mode */
1241 match_arguments(&am, block, op1, op2, NULL, match_commutative
1242 | match_mode_neutral | match_am | match_immediate | match_try_am);
1244 /* construct an Add with source address mode */
1245 if (am.op_type == ia32_AddrModeS) {
1246 ia32_address_t *am_addr = &am.addr;
1247 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1248 am_addr->index, am_addr->mem, am.new_op1,
1250 set_am_attributes(new_node, &am);
1251 SET_IA32_ORIG_NODE(new_node, node);
1253 new_node = fix_mem_proj(new_node, &am);
1258 /* otherwise construct a lea */
1259 new_node = create_lea_from_address(dbgi, new_block, &addr);
1260 SET_IA32_ORIG_NODE(new_node, node);
1265 * Creates an ia32 Mul.
1267 * @return the created ia32 Mul node
1269 static ir_node *gen_Mul(ir_node *node)
1271 ir_node *op1 = get_Mul_left(node);
1272 ir_node *op2 = get_Mul_right(node);
1273 ir_mode *mode = get_irn_mode(node);
1275 if (mode_is_float(mode)) {
1276 if (ia32_cg_config.use_sse2)
1277 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1278 match_commutative | match_am);
1280 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1282 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1283 match_commutative | match_am | match_mode_neutral |
1284 match_immediate | match_am_and_immediates);
1288 * Creates an ia32 Mulh.
1289 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1290 * this result while Mul returns the lower 32 bit.
1292 * @return the created ia32 Mulh node
1294 static ir_node *gen_Mulh(ir_node *node)
1296 ir_node *block = get_nodes_block(node);
1297 ir_node *new_block = be_transform_node(block);
1298 dbg_info *dbgi = get_irn_dbg_info(node);
1299 ir_node *op1 = get_Mulh_left(node);
1300 ir_node *op2 = get_Mulh_right(node);
1301 ir_mode *mode = get_irn_mode(node);
1303 ir_node *proj_res_high;
1305 if (get_mode_size_bits(mode) != 32) {
1306 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1309 if (mode_is_signed(mode)) {
1310 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1311 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1313 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1314 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1316 return proj_res_high;
1320 * Creates an ia32 And.
1322 * @return The created ia32 And node
1324 static ir_node *gen_And(ir_node *node)
1326 ir_node *op1 = get_And_left(node);
1327 ir_node *op2 = get_And_right(node);
1328 assert(! mode_is_float(get_irn_mode(node)));
1330 /* is it a zero extension? */
1331 if (is_Const(op2)) {
1332 tarval *tv = get_Const_tarval(op2);
1333 long v = get_tarval_long(tv);
1335 if (v == 0xFF || v == 0xFFFF) {
1336 dbg_info *dbgi = get_irn_dbg_info(node);
1337 ir_node *block = get_nodes_block(node);
1344 assert(v == 0xFFFF);
1347 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1352 return gen_binop(node, op1, op2, new_bd_ia32_And,
1353 match_commutative | match_mode_neutral | match_am | match_immediate);
1359 * Creates an ia32 Or.
1361 * @return The created ia32 Or node
1363 static ir_node *gen_Or(ir_node *node)
1365 ir_node *op1 = get_Or_left(node);
1366 ir_node *op2 = get_Or_right(node);
1368 assert (! mode_is_float(get_irn_mode(node)));
1369 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1370 | match_mode_neutral | match_am | match_immediate);
1376 * Creates an ia32 Eor.
1378 * @return The created ia32 Eor node
1380 static ir_node *gen_Eor(ir_node *node)
1382 ir_node *op1 = get_Eor_left(node);
1383 ir_node *op2 = get_Eor_right(node);
1385 assert(! mode_is_float(get_irn_mode(node)));
1386 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1387 | match_mode_neutral | match_am | match_immediate);
1392 * Creates an ia32 Sub.
1394 * @return The created ia32 Sub node
1396 static ir_node *gen_Sub(ir_node *node)
1398 ir_node *op1 = get_Sub_left(node);
1399 ir_node *op2 = get_Sub_right(node);
1400 ir_mode *mode = get_irn_mode(node);
1402 if (mode_is_float(mode)) {
1403 if (ia32_cg_config.use_sse2)
1404 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1406 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1409 if (is_Const(op2)) {
1410 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1414 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1415 | match_am | match_immediate);
1418 static ir_node *transform_AM_mem(ir_node *const block,
1419 ir_node *const src_val,
1420 ir_node *const src_mem,
1421 ir_node *const am_mem)
1423 if (is_NoMem(am_mem)) {
1424 return be_transform_node(src_mem);
1425 } else if (is_Proj(src_val) &&
1427 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1428 /* avoid memory loop */
1430 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1431 ir_node *const ptr_pred = get_Proj_pred(src_val);
1432 int const arity = get_Sync_n_preds(src_mem);
1437 NEW_ARR_A(ir_node*, ins, arity + 1);
1439 /* NOTE: This sometimes produces dead-code because the old sync in
1440 * src_mem might not be used anymore, we should detect this case
1441 * and kill the sync... */
1442 for (i = arity - 1; i >= 0; --i) {
1443 ir_node *const pred = get_Sync_pred(src_mem, i);
1445 /* avoid memory loop */
1446 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1449 ins[n++] = be_transform_node(pred);
1454 return new_r_Sync(block, n, ins);
1458 ins[0] = be_transform_node(src_mem);
1460 return new_r_Sync(block, 2, ins);
1465 * Create a 32bit to 64bit signed extension.
1467 * @param dbgi debug info
1468 * @param block the block where node nodes should be placed
1469 * @param val the value to extend
1470 * @param orig the original node
1472 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1473 ir_node *val, const ir_node *orig)
1478 if (ia32_cg_config.use_short_sex_eax) {
1479 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1480 be_dep_on_frame(pval);
1481 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1483 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1484 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1486 SET_IA32_ORIG_NODE(res, orig);
1491 * Generates an ia32 DivMod with additional infrastructure for the
1492 * register allocator if needed.
1494 static ir_node *create_Div(ir_node *node)
1496 dbg_info *dbgi = get_irn_dbg_info(node);
1497 ir_node *block = get_nodes_block(node);
1498 ir_node *new_block = be_transform_node(block);
1505 ir_node *sign_extension;
1506 ia32_address_mode_t am;
1507 ia32_address_t *addr = &am.addr;
1509 /* the upper bits have random contents for smaller modes */
1510 switch (get_irn_opcode(node)) {
1512 op1 = get_Div_left(node);
1513 op2 = get_Div_right(node);
1514 mem = get_Div_mem(node);
1515 mode = get_Div_resmode(node);
1518 op1 = get_Mod_left(node);
1519 op2 = get_Mod_right(node);
1520 mem = get_Mod_mem(node);
1521 mode = get_Mod_resmode(node);
1524 op1 = get_DivMod_left(node);
1525 op2 = get_DivMod_right(node);
1526 mem = get_DivMod_mem(node);
1527 mode = get_DivMod_resmode(node);
1530 panic("invalid divmod node %+F", node);
1533 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1535 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1536 is the memory of the consumed address. We can have only the second op as address
1537 in Div nodes, so check only op2. */
1538 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1540 if (mode_is_signed(mode)) {
1541 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1542 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1543 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1545 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1546 be_dep_on_frame(sign_extension);
1548 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1549 addr->index, new_mem, am.new_op2,
1550 am.new_op1, sign_extension);
1553 set_irn_pinned(new_node, get_irn_pinned(node));
1555 set_am_attributes(new_node, &am);
1556 SET_IA32_ORIG_NODE(new_node, node);
1558 new_node = fix_mem_proj(new_node, &am);
1564 * Generates an ia32 Mod.
1566 static ir_node *gen_Mod(ir_node *node)
1568 return create_Div(node);
1572 * Generates an ia32 Div.
1574 static ir_node *gen_Div(ir_node *node)
1576 return create_Div(node);
1580 * Generates an ia32 DivMod.
1582 static ir_node *gen_DivMod(ir_node *node)
1584 return create_Div(node);
1590 * Creates an ia32 floating Div.
1592 * @return The created ia32 xDiv node
1594 static ir_node *gen_Quot(ir_node *node)
1596 ir_node *op1 = get_Quot_left(node);
1597 ir_node *op2 = get_Quot_right(node);
1599 if (ia32_cg_config.use_sse2) {
1600 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1602 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1608 * Creates an ia32 Shl.
1610 * @return The created ia32 Shl node
1612 static ir_node *gen_Shl(ir_node *node)
1614 ir_node *left = get_Shl_left(node);
1615 ir_node *right = get_Shl_right(node);
1617 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1618 match_mode_neutral | match_immediate);
1622 * Creates an ia32 Shr.
1624 * @return The created ia32 Shr node
1626 static ir_node *gen_Shr(ir_node *node)
1628 ir_node *left = get_Shr_left(node);
1629 ir_node *right = get_Shr_right(node);
1631 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1637 * Creates an ia32 Sar.
1639 * @return The created ia32 Shrs node
1641 static ir_node *gen_Shrs(ir_node *node)
1643 ir_node *left = get_Shrs_left(node);
1644 ir_node *right = get_Shrs_right(node);
1646 if (is_Const(right)) {
1647 tarval *tv = get_Const_tarval(right);
1648 long val = get_tarval_long(tv);
1650 /* this is a sign extension */
1651 dbg_info *dbgi = get_irn_dbg_info(node);
1652 ir_node *block = be_transform_node(get_nodes_block(node));
1653 ir_node *new_op = be_transform_node(left);
1655 return create_sex_32_64(dbgi, block, new_op, node);
1659 /* 8 or 16 bit sign extension? */
1660 if (is_Const(right) && is_Shl(left)) {
1661 ir_node *shl_left = get_Shl_left(left);
1662 ir_node *shl_right = get_Shl_right(left);
1663 if (is_Const(shl_right)) {
1664 tarval *tv1 = get_Const_tarval(right);
1665 tarval *tv2 = get_Const_tarval(shl_right);
1666 if (tv1 == tv2 && tarval_is_long(tv1)) {
1667 long val = get_tarval_long(tv1);
1668 if (val == 16 || val == 24) {
1669 dbg_info *dbgi = get_irn_dbg_info(node);
1670 ir_node *block = get_nodes_block(node);
1680 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1689 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1695 * Creates an ia32 Rol.
1697 * @param op1 The first operator
1698 * @param op2 The second operator
1699 * @return The created ia32 RotL node
1701 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1703 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1709 * Creates an ia32 Ror.
1710 * NOTE: There is no RotR with immediate because this would always be a RotL
1711 * "imm-mode_size_bits" which can be pre-calculated.
1713 * @param op1 The first operator
1714 * @param op2 The second operator
1715 * @return The created ia32 RotR node
1717 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1719 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1725 * Creates an ia32 RotR or RotL (depending on the found pattern).
1727 * @return The created ia32 RotL or RotR node
1729 static ir_node *gen_Rotl(ir_node *node)
1731 ir_node *rotate = NULL;
1732 ir_node *op1 = get_Rotl_left(node);
1733 ir_node *op2 = get_Rotl_right(node);
1735 /* Firm has only RotL, so we are looking for a right (op2)
1736 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1737 that means we can create a RotR instead of an Add and a RotL */
1741 ir_node *left = get_Add_left(add);
1742 ir_node *right = get_Add_right(add);
1743 if (is_Const(right)) {
1744 tarval *tv = get_Const_tarval(right);
1745 ir_mode *mode = get_irn_mode(node);
1746 long bits = get_mode_size_bits(mode);
1748 if (is_Minus(left) &&
1749 tarval_is_long(tv) &&
1750 get_tarval_long(tv) == bits &&
1753 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1754 rotate = gen_Ror(node, op1, get_Minus_op(left));
1759 if (rotate == NULL) {
1760 rotate = gen_Rol(node, op1, op2);
1769 * Transforms a Minus node.
1771 * @return The created ia32 Minus node
1773 static ir_node *gen_Minus(ir_node *node)
1775 ir_node *op = get_Minus_op(node);
1776 ir_node *block = be_transform_node(get_nodes_block(node));
1777 dbg_info *dbgi = get_irn_dbg_info(node);
1778 ir_mode *mode = get_irn_mode(node);
1783 if (mode_is_float(mode)) {
1784 ir_node *new_op = be_transform_node(op);
1785 if (ia32_cg_config.use_sse2) {
1786 /* TODO: non-optimal... if we have many xXors, then we should
1787 * rather create a load for the const and use that instead of
1788 * several AM nodes... */
1789 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1791 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1792 nomem, new_op, noreg_xmm);
1794 size = get_mode_size_bits(mode);
1795 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1797 set_ia32_am_sc(new_node, ent);
1798 set_ia32_op_type(new_node, ia32_AddrModeS);
1799 set_ia32_ls_mode(new_node, mode);
1801 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1804 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1807 SET_IA32_ORIG_NODE(new_node, node);
1813 * Transforms a Not node.
1815 * @return The created ia32 Not node
1817 static ir_node *gen_Not(ir_node *node)
1819 ir_node *op = get_Not_op(node);
1821 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1822 assert (! mode_is_float(get_irn_mode(node)));
1824 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1830 * Transforms an Abs node.
1832 * @return The created ia32 Abs node
1834 static ir_node *gen_Abs(ir_node *node)
1836 ir_node *block = get_nodes_block(node);
1837 ir_node *new_block = be_transform_node(block);
1838 ir_node *op = get_Abs_op(node);
1839 dbg_info *dbgi = get_irn_dbg_info(node);
1840 ir_mode *mode = get_irn_mode(node);
1846 if (mode_is_float(mode)) {
1847 new_op = be_transform_node(op);
1849 if (ia32_cg_config.use_sse2) {
1850 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1851 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1852 nomem, new_op, noreg_fp);
1854 size = get_mode_size_bits(mode);
1855 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1857 set_ia32_am_sc(new_node, ent);
1859 SET_IA32_ORIG_NODE(new_node, node);
1861 set_ia32_op_type(new_node, ia32_AddrModeS);
1862 set_ia32_ls_mode(new_node, mode);
1864 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1865 SET_IA32_ORIG_NODE(new_node, node);
1868 ir_node *xor, *sign_extension;
1870 if (get_mode_size_bits(mode) == 32) {
1871 new_op = be_transform_node(op);
1873 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1876 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1878 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1879 nomem, new_op, sign_extension);
1880 SET_IA32_ORIG_NODE(xor, node);
1882 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1883 nomem, xor, sign_extension);
1884 SET_IA32_ORIG_NODE(new_node, node);
1891 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1893 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1895 dbg_info *dbgi = get_irn_dbg_info(cmp);
1896 ir_node *block = get_nodes_block(cmp);
1897 ir_node *new_block = be_transform_node(block);
1898 ir_node *op1 = be_transform_node(x);
1899 ir_node *op2 = be_transform_node(n);
1901 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1905 * Transform a node returning a "flag" result.
1907 * @param node the node to transform
1908 * @param pnc_out the compare mode to use
1910 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1917 /* we have a Cmp as input */
1918 if (is_Proj(node)) {
1919 ir_node *pred = get_Proj_pred(node);
1921 pn_Cmp pnc = get_Proj_proj(node);
1922 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1923 ir_node *l = get_Cmp_left(pred);
1924 ir_node *r = get_Cmp_right(pred);
1926 ir_node *la = get_And_left(l);
1927 ir_node *ra = get_And_right(l);
1929 ir_node *c = get_Shl_left(la);
1930 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1931 /* (1 << n) & ra) */
1932 ir_node *n = get_Shl_right(la);
1933 flags = gen_bt(pred, ra, n);
1934 /* we must generate a Jc/Jnc jump */
1935 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1938 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1943 ir_node *c = get_Shl_left(ra);
1944 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1945 /* la & (1 << n)) */
1946 ir_node *n = get_Shl_right(ra);
1947 flags = gen_bt(pred, la, n);
1948 /* we must generate a Jc/Jnc jump */
1949 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1952 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1958 flags = be_transform_node(pred);
1959 if (mode_is_float(get_irn_mode(get_Cmp_left(pred))))
1960 pnc |= ia32_pn_Cmp_float;
1966 /* a mode_b value, we have to compare it against 0 */
1967 dbgi = get_irn_dbg_info(node);
1968 new_block = be_transform_node(get_nodes_block(node));
1969 new_op = be_transform_node(node);
1970 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1971 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1972 *pnc_out = pn_Cmp_Lg;
1977 * Transforms a Load.
1979 * @return the created ia32 Load node
1981 static ir_node *gen_Load(ir_node *node)
1983 ir_node *old_block = get_nodes_block(node);
1984 ir_node *block = be_transform_node(old_block);
1985 ir_node *ptr = get_Load_ptr(node);
1986 ir_node *mem = get_Load_mem(node);
1987 ir_node *new_mem = be_transform_node(mem);
1990 dbg_info *dbgi = get_irn_dbg_info(node);
1991 ir_mode *mode = get_Load_mode(node);
1994 ia32_address_t addr;
1996 /* construct load address */
1997 memset(&addr, 0, sizeof(addr));
1998 ia32_create_address_mode(&addr, ptr, 0);
2005 base = be_transform_node(base);
2008 if (index == NULL) {
2011 index = be_transform_node(index);
2014 if (mode_is_float(mode)) {
2015 if (ia32_cg_config.use_sse2) {
2016 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2018 res_mode = mode_xmm;
2020 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2022 res_mode = mode_vfp;
2025 assert(mode != mode_b);
2027 /* create a conv node with address mode for smaller modes */
2028 if (get_mode_size_bits(mode) < 32) {
2029 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2030 new_mem, noreg_GP, mode);
2032 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2037 set_irn_pinned(new_node, get_irn_pinned(node));
2038 set_ia32_op_type(new_node, ia32_AddrModeS);
2039 set_ia32_ls_mode(new_node, mode);
2040 set_address(new_node, &addr);
2042 if (get_irn_pinned(node) == op_pin_state_floats) {
2043 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2044 && pn_ia32_vfld_res == pn_ia32_Load_res
2045 && pn_ia32_Load_res == pn_ia32_res);
2046 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2049 SET_IA32_ORIG_NODE(new_node, node);
2051 be_dep_on_frame(new_node);
2055 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2056 ir_node *ptr, ir_node *other)
2063 /* we only use address mode if we're the only user of the load */
2064 if (get_irn_n_edges(node) > 1)
2067 load = get_Proj_pred(node);
2070 if (get_nodes_block(load) != block)
2073 /* store should have the same pointer as the load */
2074 if (get_Load_ptr(load) != ptr)
2077 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2078 if (other != NULL &&
2079 get_nodes_block(other) == block &&
2080 heights_reachable_in_block(heights, other, load)) {
2084 if (prevents_AM(block, load, mem))
2086 /* Store should be attached to the load via mem */
2087 assert(heights_reachable_in_block(heights, mem, load));
2092 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2093 ir_node *mem, ir_node *ptr, ir_mode *mode,
2094 construct_binop_dest_func *func,
2095 construct_binop_dest_func *func8bit,
2096 match_flags_t flags)
2098 ir_node *src_block = get_nodes_block(node);
2106 ia32_address_mode_t am;
2107 ia32_address_t *addr = &am.addr;
2108 memset(&am, 0, sizeof(am));
2110 assert(flags & match_immediate); /* there is no destam node without... */
2111 commutative = (flags & match_commutative) != 0;
2113 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2114 build_address(&am, op1, ia32_create_am_double_use);
2115 new_op = create_immediate_or_transform(op2, 0);
2116 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2117 build_address(&am, op2, ia32_create_am_double_use);
2118 new_op = create_immediate_or_transform(op1, 0);
2123 if (addr->base == NULL)
2124 addr->base = noreg_GP;
2125 if (addr->index == NULL)
2126 addr->index = noreg_GP;
2127 if (addr->mem == NULL)
2130 dbgi = get_irn_dbg_info(node);
2131 block = be_transform_node(src_block);
2132 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2134 if (get_mode_size_bits(mode) == 8) {
2135 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2137 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2139 set_address(new_node, addr);
2140 set_ia32_op_type(new_node, ia32_AddrModeD);
2141 set_ia32_ls_mode(new_node, mode);
2142 SET_IA32_ORIG_NODE(new_node, node);
2144 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2145 mem_proj = be_transform_node(am.mem_proj);
2146 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2151 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2152 ir_node *ptr, ir_mode *mode,
2153 construct_unop_dest_func *func)
2155 ir_node *src_block = get_nodes_block(node);
2161 ia32_address_mode_t am;
2162 ia32_address_t *addr = &am.addr;
2164 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2167 memset(&am, 0, sizeof(am));
2168 build_address(&am, op, ia32_create_am_double_use);
2170 dbgi = get_irn_dbg_info(node);
2171 block = be_transform_node(src_block);
2172 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2173 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2174 set_address(new_node, addr);
2175 set_ia32_op_type(new_node, ia32_AddrModeD);
2176 set_ia32_ls_mode(new_node, mode);
2177 SET_IA32_ORIG_NODE(new_node, node);
2179 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2180 mem_proj = be_transform_node(am.mem_proj);
2181 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2186 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2188 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2189 return get_negated_pnc(pnc, mode);
2192 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2194 ir_mode *mode = get_irn_mode(node);
2195 ir_node *mux_true = get_Mux_true(node);
2196 ir_node *mux_false = get_Mux_false(node);
2206 ia32_address_t addr;
2208 if (get_mode_size_bits(mode) != 8)
2211 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2213 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2219 cond = get_Mux_sel(node);
2220 flags = get_flags_node(cond, &pnc);
2221 /* we can't handle the float special cases with SetM */
2222 if (pnc & ia32_pn_Cmp_float)
2225 pnc = ia32_get_negated_pnc(pnc);
2227 build_address_ptr(&addr, ptr, mem);
2229 dbgi = get_irn_dbg_info(node);
2230 block = get_nodes_block(node);
2231 new_block = be_transform_node(block);
2232 new_mem = be_transform_node(mem);
2233 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2234 addr.index, addr.mem, flags, pnc);
2235 set_address(new_node, &addr);
2236 set_ia32_op_type(new_node, ia32_AddrModeD);
2237 set_ia32_ls_mode(new_node, mode);
2238 SET_IA32_ORIG_NODE(new_node, node);
2243 static ir_node *try_create_dest_am(ir_node *node)
2245 ir_node *val = get_Store_value(node);
2246 ir_node *mem = get_Store_mem(node);
2247 ir_node *ptr = get_Store_ptr(node);
2248 ir_mode *mode = get_irn_mode(val);
2249 unsigned bits = get_mode_size_bits(mode);
2254 /* handle only GP modes for now... */
2255 if (!ia32_mode_needs_gp_reg(mode))
2259 /* store must be the only user of the val node */
2260 if (get_irn_n_edges(val) > 1)
2262 /* skip pointless convs */
2264 ir_node *conv_op = get_Conv_op(val);
2265 ir_mode *pred_mode = get_irn_mode(conv_op);
2266 if (!ia32_mode_needs_gp_reg(pred_mode))
2268 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2276 /* value must be in the same block */
2277 if (get_nodes_block(node) != get_nodes_block(val))
2280 switch (get_irn_opcode(val)) {
2282 op1 = get_Add_left(val);
2283 op2 = get_Add_right(val);
2284 if (ia32_cg_config.use_incdec) {
2285 if (is_Const_1(op2)) {
2286 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2288 } else if (is_Const_Minus_1(op2)) {
2289 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2293 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2294 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2295 match_commutative | match_immediate);
2298 op1 = get_Sub_left(val);
2299 op2 = get_Sub_right(val);
2300 if (is_Const(op2)) {
2301 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2303 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2304 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2308 op1 = get_And_left(val);
2309 op2 = get_And_right(val);
2310 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2311 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2312 match_commutative | match_immediate);
2315 op1 = get_Or_left(val);
2316 op2 = get_Or_right(val);
2317 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2318 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2319 match_commutative | match_immediate);
2322 op1 = get_Eor_left(val);
2323 op2 = get_Eor_right(val);
2324 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2325 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2326 match_commutative | match_immediate);
2329 op1 = get_Shl_left(val);
2330 op2 = get_Shl_right(val);
2331 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2332 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2336 op1 = get_Shr_left(val);
2337 op2 = get_Shr_right(val);
2338 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2339 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2343 op1 = get_Shrs_left(val);
2344 op2 = get_Shrs_right(val);
2345 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2346 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2350 op1 = get_Rotl_left(val);
2351 op2 = get_Rotl_right(val);
2352 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2353 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2356 /* TODO: match ROR patterns... */
2358 new_node = try_create_SetMem(val, ptr, mem);
2362 op1 = get_Minus_op(val);
2363 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2366 /* should be lowered already */
2367 assert(mode != mode_b);
2368 op1 = get_Not_op(val);
2369 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2375 if (new_node != NULL) {
2376 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2377 get_irn_pinned(node) == op_pin_state_pinned) {
2378 set_irn_pinned(new_node, op_pin_state_pinned);
2385 static bool possible_int_mode_for_fp(ir_mode *mode)
2389 if (!mode_is_signed(mode))
2391 size = get_mode_size_bits(mode);
2392 if (size != 16 && size != 32)
2397 static int is_float_to_int_conv(const ir_node *node)
2399 ir_mode *mode = get_irn_mode(node);
2403 if (!possible_int_mode_for_fp(mode))
2408 conv_op = get_Conv_op(node);
2409 conv_mode = get_irn_mode(conv_op);
2411 if (!mode_is_float(conv_mode))
2418 * Transform a Store(floatConst) into a sequence of
2421 * @return the created ia32 Store node
2423 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2425 ir_mode *mode = get_irn_mode(cns);
2426 unsigned size = get_mode_size_bytes(mode);
2427 tarval *tv = get_Const_tarval(cns);
2428 ir_node *block = get_nodes_block(node);
2429 ir_node *new_block = be_transform_node(block);
2430 ir_node *ptr = get_Store_ptr(node);
2431 ir_node *mem = get_Store_mem(node);
2432 dbg_info *dbgi = get_irn_dbg_info(node);
2436 ia32_address_t addr;
2438 assert(size % 4 == 0);
2441 build_address_ptr(&addr, ptr, mem);
2445 get_tarval_sub_bits(tv, ofs) |
2446 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2447 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2448 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2449 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2451 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2452 addr.index, addr.mem, imm);
2454 set_irn_pinned(new_node, get_irn_pinned(node));
2455 set_ia32_op_type(new_node, ia32_AddrModeD);
2456 set_ia32_ls_mode(new_node, mode_Iu);
2457 set_address(new_node, &addr);
2458 SET_IA32_ORIG_NODE(new_node, node);
2461 ins[i++] = new_node;
2466 } while (size != 0);
2469 return new_rd_Sync(dbgi, new_block, i, ins);
2476 * Generate a vfist or vfisttp instruction.
2478 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2479 ir_node *mem, ir_node *val, ir_node **fist)
2483 if (ia32_cg_config.use_fisttp) {
2484 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2485 if other users exists */
2486 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2487 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2488 be_new_Keep(block, 1, &value);
2490 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2493 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2496 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2502 * Transforms a general (no special case) Store.
2504 * @return the created ia32 Store node
2506 static ir_node *gen_general_Store(ir_node *node)
2508 ir_node *val = get_Store_value(node);
2509 ir_mode *mode = get_irn_mode(val);
2510 ir_node *block = get_nodes_block(node);
2511 ir_node *new_block = be_transform_node(block);
2512 ir_node *ptr = get_Store_ptr(node);
2513 ir_node *mem = get_Store_mem(node);
2514 dbg_info *dbgi = get_irn_dbg_info(node);
2515 ir_node *new_val, *new_node, *store;
2516 ia32_address_t addr;
2518 /* check for destination address mode */
2519 new_node = try_create_dest_am(node);
2520 if (new_node != NULL)
2523 /* construct store address */
2524 memset(&addr, 0, sizeof(addr));
2525 ia32_create_address_mode(&addr, ptr, 0);
2527 if (addr.base == NULL) {
2528 addr.base = noreg_GP;
2530 addr.base = be_transform_node(addr.base);
2533 if (addr.index == NULL) {
2534 addr.index = noreg_GP;
2536 addr.index = be_transform_node(addr.index);
2538 addr.mem = be_transform_node(mem);
2540 if (mode_is_float(mode)) {
2541 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2543 while (is_Conv(val) && mode == get_irn_mode(val)) {
2544 ir_node *op = get_Conv_op(val);
2545 if (!mode_is_float(get_irn_mode(op)))
2549 new_val = be_transform_node(val);
2550 if (ia32_cg_config.use_sse2) {
2551 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2552 addr.index, addr.mem, new_val);
2554 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2555 addr.index, addr.mem, new_val, mode);
2558 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2559 val = get_Conv_op(val);
2561 /* TODO: is this optimisation still necessary at all (middleend)? */
2562 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2563 while (is_Conv(val)) {
2564 ir_node *op = get_Conv_op(val);
2565 if (!mode_is_float(get_irn_mode(op)))
2567 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2571 new_val = be_transform_node(val);
2572 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2574 new_val = create_immediate_or_transform(val, 0);
2575 assert(mode != mode_b);
2577 if (get_mode_size_bits(mode) == 8) {
2578 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2579 addr.index, addr.mem, new_val);
2581 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2582 addr.index, addr.mem, new_val);
2587 set_irn_pinned(store, get_irn_pinned(node));
2588 set_ia32_op_type(store, ia32_AddrModeD);
2589 set_ia32_ls_mode(store, mode);
2591 set_address(store, &addr);
2592 SET_IA32_ORIG_NODE(store, node);
2598 * Transforms a Store.
2600 * @return the created ia32 Store node
2602 static ir_node *gen_Store(ir_node *node)
2604 ir_node *val = get_Store_value(node);
2605 ir_mode *mode = get_irn_mode(val);
2607 if (mode_is_float(mode) && is_Const(val)) {
2608 /* We can transform every floating const store
2609 into a sequence of integer stores.
2610 If the constant is already in a register,
2611 it would be better to use it, but we don't
2612 have this information here. */
2613 return gen_float_const_Store(node, val);
2615 return gen_general_Store(node);
2619 * Transforms a Switch.
2621 * @return the created ia32 SwitchJmp node
2623 static ir_node *create_Switch(ir_node *node)
2625 dbg_info *dbgi = get_irn_dbg_info(node);
2626 ir_node *block = be_transform_node(get_nodes_block(node));
2627 ir_node *sel = get_Cond_selector(node);
2628 ir_node *new_sel = be_transform_node(sel);
2629 long switch_min = LONG_MAX;
2630 long switch_max = LONG_MIN;
2631 long default_pn = get_Cond_default_proj(node);
2633 const ir_edge_t *edge;
2635 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2637 /* determine the smallest switch case value */
2638 foreach_out_edge(node, edge) {
2639 ir_node *proj = get_edge_src_irn(edge);
2640 long pn = get_Proj_proj(proj);
2641 if (pn == default_pn)
2644 if (pn < switch_min)
2646 if (pn > switch_max)
2650 if ((unsigned long) (switch_max - switch_min) > 128000) {
2651 panic("Size of switch %+F bigger than 128000", node);
2654 if (switch_min != 0) {
2655 /* if smallest switch case is not 0 we need an additional sub */
2656 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2657 add_ia32_am_offs_int(new_sel, -switch_min);
2658 set_ia32_op_type(new_sel, ia32_AddrModeS);
2660 SET_IA32_ORIG_NODE(new_sel, node);
2663 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2664 SET_IA32_ORIG_NODE(new_node, node);
2670 * Transform a Cond node.
2672 static ir_node *gen_Cond(ir_node *node)
2674 ir_node *block = get_nodes_block(node);
2675 ir_node *new_block = be_transform_node(block);
2676 dbg_info *dbgi = get_irn_dbg_info(node);
2677 ir_node *sel = get_Cond_selector(node);
2678 ir_mode *sel_mode = get_irn_mode(sel);
2679 ir_node *flags = NULL;
2683 if (sel_mode != mode_b) {
2684 return create_Switch(node);
2687 /* we get flags from a Cmp */
2688 flags = get_flags_node(sel, &pnc);
2690 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2691 SET_IA32_ORIG_NODE(new_node, node);
2697 * Transform a be_Copy.
2699 static ir_node *gen_be_Copy(ir_node *node)
2701 ir_node *new_node = be_duplicate_node(node);
2702 ir_mode *mode = get_irn_mode(new_node);
2704 if (ia32_mode_needs_gp_reg(mode)) {
2705 set_irn_mode(new_node, mode_Iu);
2711 static ir_node *create_Fucom(ir_node *node)
2713 dbg_info *dbgi = get_irn_dbg_info(node);
2714 ir_node *block = get_nodes_block(node);
2715 ir_node *new_block = be_transform_node(block);
2716 ir_node *left = get_Cmp_left(node);
2717 ir_node *new_left = be_transform_node(left);
2718 ir_node *right = get_Cmp_right(node);
2722 if (ia32_cg_config.use_fucomi) {
2723 new_right = be_transform_node(right);
2724 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2726 set_ia32_commutative(new_node);
2727 SET_IA32_ORIG_NODE(new_node, node);
2729 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2730 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2732 new_right = be_transform_node(right);
2733 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2736 set_ia32_commutative(new_node);
2738 SET_IA32_ORIG_NODE(new_node, node);
2740 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2741 SET_IA32_ORIG_NODE(new_node, node);
2747 static ir_node *create_Ucomi(ir_node *node)
2749 dbg_info *dbgi = get_irn_dbg_info(node);
2750 ir_node *src_block = get_nodes_block(node);
2751 ir_node *new_block = be_transform_node(src_block);
2752 ir_node *left = get_Cmp_left(node);
2753 ir_node *right = get_Cmp_right(node);
2755 ia32_address_mode_t am;
2756 ia32_address_t *addr = &am.addr;
2758 match_arguments(&am, src_block, left, right, NULL,
2759 match_commutative | match_am);
2761 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2762 addr->mem, am.new_op1, am.new_op2,
2764 set_am_attributes(new_node, &am);
2766 SET_IA32_ORIG_NODE(new_node, node);
2768 new_node = fix_mem_proj(new_node, &am);
2774 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2775 * to fold an and into a test node
2777 static bool can_fold_test_and(ir_node *node)
2779 const ir_edge_t *edge;
2781 /** we can only have eq and lg projs */
2782 foreach_out_edge(node, edge) {
2783 ir_node *proj = get_edge_src_irn(edge);
2784 pn_Cmp pnc = get_Proj_proj(proj);
2785 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2793 * returns true if it is assured, that the upper bits of a node are "clean"
2794 * which means for a 16 or 8 bit value, that the upper bits in the register
2795 * are 0 for unsigned and a copy of the last significant bit for signed
2798 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2800 assert(ia32_mode_needs_gp_reg(mode));
2801 if (get_mode_size_bits(mode) >= 32)
2804 if (is_Proj(transformed_node))
2805 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2807 switch (get_ia32_irn_opcode(transformed_node)) {
2808 case iro_ia32_Conv_I2I:
2809 case iro_ia32_Conv_I2I8Bit: {
2810 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2811 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2813 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2820 if (mode_is_signed(mode)) {
2821 return false; /* TODO handle signed modes */
2823 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2824 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2825 const ia32_immediate_attr_t *attr
2826 = get_ia32_immediate_attr_const(right);
2827 if (attr->symconst == 0 &&
2828 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2832 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2836 /* TODO too conservative if shift amount is constant */
2837 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2840 if (!mode_is_signed(mode)) {
2842 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2843 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2845 /* TODO if one is known to be zero extended, then || is sufficient */
2850 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2851 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2853 case iro_ia32_Const:
2854 case iro_ia32_Immediate: {
2855 const ia32_immediate_attr_t *attr =
2856 get_ia32_immediate_attr_const(transformed_node);
2857 if (mode_is_signed(mode)) {
2858 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2859 return shifted == 0 || shifted == -1;
2861 unsigned long shifted = (unsigned long)attr->offset;
2862 shifted >>= get_mode_size_bits(mode);
2863 return shifted == 0;
2873 * Generate code for a Cmp.
2875 static ir_node *gen_Cmp(ir_node *node)
2877 dbg_info *dbgi = get_irn_dbg_info(node);
2878 ir_node *block = get_nodes_block(node);
2879 ir_node *new_block = be_transform_node(block);
2880 ir_node *left = get_Cmp_left(node);
2881 ir_node *right = get_Cmp_right(node);
2882 ir_mode *cmp_mode = get_irn_mode(left);
2884 ia32_address_mode_t am;
2885 ia32_address_t *addr = &am.addr;
2888 if (mode_is_float(cmp_mode)) {
2889 if (ia32_cg_config.use_sse2) {
2890 return create_Ucomi(node);
2892 return create_Fucom(node);
2896 assert(ia32_mode_needs_gp_reg(cmp_mode));
2898 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2899 cmp_unsigned = !mode_is_signed(cmp_mode);
2900 if (is_Const_0(right) &&
2902 get_irn_n_edges(left) == 1 &&
2903 can_fold_test_and(node)) {
2904 /* Test(and_left, and_right) */
2905 ir_node *and_left = get_And_left(left);
2906 ir_node *and_right = get_And_right(left);
2908 /* matze: code here used mode instead of cmd_mode, I think it is always
2909 * the same as cmp_mode, but I leave this here to see if this is really
2912 assert(get_irn_mode(and_left) == cmp_mode);
2914 match_arguments(&am, block, and_left, and_right, NULL,
2916 match_am | match_8bit_am | match_16bit_am |
2917 match_am_and_immediates | match_immediate);
2919 /* use 32bit compare mode if possible since the opcode is smaller */
2920 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2921 upper_bits_clean(am.new_op2, cmp_mode)) {
2922 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2925 if (get_mode_size_bits(cmp_mode) == 8) {
2926 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2927 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2930 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2931 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2934 /* Cmp(left, right) */
2935 match_arguments(&am, block, left, right, NULL,
2936 match_commutative | match_am | match_8bit_am |
2937 match_16bit_am | match_am_and_immediates |
2939 /* use 32bit compare mode if possible since the opcode is smaller */
2940 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2941 upper_bits_clean(am.new_op2, cmp_mode)) {
2942 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2945 if (get_mode_size_bits(cmp_mode) == 8) {
2946 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2947 addr->index, addr->mem, am.new_op1,
2948 am.new_op2, am.ins_permuted,
2951 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2952 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2955 set_am_attributes(new_node, &am);
2956 set_ia32_ls_mode(new_node, cmp_mode);
2958 SET_IA32_ORIG_NODE(new_node, node);
2960 new_node = fix_mem_proj(new_node, &am);
2965 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2968 dbg_info *dbgi = get_irn_dbg_info(node);
2969 ir_node *block = get_nodes_block(node);
2970 ir_node *new_block = be_transform_node(block);
2971 ir_node *val_true = get_Mux_true(node);
2972 ir_node *val_false = get_Mux_false(node);
2974 ia32_address_mode_t am;
2975 ia32_address_t *addr;
2977 assert(ia32_cg_config.use_cmov);
2978 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2982 match_arguments(&am, block, val_false, val_true, flags,
2983 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2985 if (am.ins_permuted)
2986 pnc = ia32_get_negated_pnc(pnc);
2988 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2989 addr->mem, am.new_op1, am.new_op2, new_flags,
2991 set_am_attributes(new_node, &am);
2993 SET_IA32_ORIG_NODE(new_node, node);
2995 new_node = fix_mem_proj(new_node, &am);
3001 * Creates a ia32 Setcc instruction.
3003 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3004 ir_node *flags, pn_Cmp pnc,
3007 ir_mode *mode = get_irn_mode(orig_node);
3010 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3011 SET_IA32_ORIG_NODE(new_node, orig_node);
3013 /* we might need to conv the result up */
3014 if (get_mode_size_bits(mode) > 8) {
3015 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3016 nomem, new_node, mode_Bu);
3017 SET_IA32_ORIG_NODE(new_node, orig_node);
3024 * Create instruction for an unsigned Difference or Zero.
3026 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3028 ir_mode *mode = get_irn_mode(psi);
3038 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3039 match_mode_neutral | match_am | match_immediate | match_two_users);
3041 block = get_nodes_block(new_node);
3043 if (is_Proj(new_node)) {
3044 sub = get_Proj_pred(new_node);
3045 assert(is_ia32_Sub(sub));
3048 set_irn_mode(sub, mode_T);
3049 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3051 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3053 dbgi = get_irn_dbg_info(psi);
3054 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3055 not = new_bd_ia32_Not(dbgi, block, sbb);
3057 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3058 set_ia32_commutative(new_node);
3063 * Create an const array of two float consts.
3065 * @param c0 the first constant
3066 * @param c1 the second constant
3067 * @param new_mode IN/OUT for the mode of the constants, if NULL
3068 * smallest possible mode will be used
3070 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3072 ir_mode *mode = *new_mode;
3074 ir_initializer_t *initializer;
3075 tarval *tv0 = get_Const_tarval(c0);
3076 tarval *tv1 = get_Const_tarval(c1);
3079 /* detect the best mode for the constants */
3080 mode = get_tarval_mode(tv0);
3082 if (mode != mode_F) {
3083 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3084 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3086 tv0 = tarval_convert_to(tv0, mode);
3087 tv1 = tarval_convert_to(tv1, mode);
3088 } else if (mode != mode_D) {
3089 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3090 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3092 tv0 = tarval_convert_to(tv0, mode);
3093 tv1 = tarval_convert_to(tv1, mode);
3100 tp = ia32_create_float_type(mode, 4);
3101 tp = ia32_create_float_array(tp);
3103 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3105 set_entity_ld_ident(ent, get_entity_ident(ent));
3106 set_entity_visibility(ent, visibility_local);
3107 set_entity_variability(ent, variability_constant);
3108 set_entity_allocation(ent, allocation_static);
3110 initializer = create_initializer_compound(2);
3112 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3113 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3115 set_entity_initializer(ent, initializer);
3122 * Transforms a Mux node into some code sequence.
3124 * @return The transformed node.
3126 static ir_node *gen_Mux(ir_node *node)
3128 dbg_info *dbgi = get_irn_dbg_info(node);
3129 ir_node *block = get_nodes_block(node);
3130 ir_node *new_block = be_transform_node(block);
3131 ir_node *mux_true = get_Mux_true(node);
3132 ir_node *mux_false = get_Mux_false(node);
3133 ir_node *cond = get_Mux_sel(node);
3134 ir_mode *mode = get_irn_mode(node);
3139 assert(get_irn_mode(cond) == mode_b);
3141 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3142 if (mode_is_float(mode)) {
3143 ir_node *cmp = get_Proj_pred(cond);
3144 ir_node *cmp_left = get_Cmp_left(cmp);
3145 ir_node *cmp_right = get_Cmp_right(cmp);
3146 pn_Cmp pnc = get_Proj_proj(cond);
3148 if (ia32_cg_config.use_sse2) {
3149 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3150 if (cmp_left == mux_true && cmp_right == mux_false) {
3151 /* Mux(a <= b, a, b) => MIN */
3152 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3153 match_commutative | match_am | match_two_users);
3154 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3155 /* Mux(a <= b, b, a) => MAX */
3156 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3157 match_commutative | match_am | match_two_users);
3159 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3160 if (cmp_left == mux_true && cmp_right == mux_false) {
3161 /* Mux(a >= b, a, b) => MAX */
3162 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3163 match_commutative | match_am | match_two_users);
3164 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3165 /* Mux(a >= b, b, a) => MIN */
3166 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3167 match_commutative | match_am | match_two_users);
3172 if (is_Const(mux_true) && is_Const(mux_false)) {
3173 ia32_address_mode_t am;
3178 flags = get_flags_node(cond, &pnc);
3179 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3181 if (ia32_cg_config.use_sse2) {
3182 /* cannot load from different mode on SSE */
3185 /* x87 can load any mode */
3189 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3191 switch (get_mode_size_bytes(new_mode)) {
3201 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3202 set_ia32_am_scale(new_node, 2);
3207 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3208 set_ia32_am_scale(new_node, 1);
3211 /* arg, shift 16 NOT supported */
3213 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3216 panic("Unsupported constant size");
3219 am.ls_mode = new_mode;
3220 am.addr.base = noreg_GP;
3221 am.addr.index = new_node;
3222 am.addr.mem = nomem;
3224 am.addr.scale = scale;
3225 am.addr.use_frame = 0;
3226 am.addr.frame_entity = NULL;
3227 am.addr.symconst_sign = 0;
3228 am.mem_proj = am.addr.mem;
3229 am.op_type = ia32_AddrModeS;
3232 am.pinned = op_pin_state_floats;
3234 am.ins_permuted = 0;
3236 if (ia32_cg_config.use_sse2)
3237 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3239 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3240 set_am_attributes(load, &am);
3242 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3244 panic("cannot transform floating point Mux");
3247 assert(ia32_mode_needs_gp_reg(mode));
3249 if (is_Proj(cond)) {
3250 ir_node *cmp = get_Proj_pred(cond);
3252 ir_node *cmp_left = get_Cmp_left(cmp);
3253 ir_node *cmp_right = get_Cmp_right(cmp);
3254 pn_Cmp pnc = get_Proj_proj(cond);
3256 /* check for unsigned Doz first */
3257 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3258 is_Const_0(mux_false) && is_Sub(mux_true) &&
3259 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3260 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3261 return create_doz(node, cmp_left, cmp_right);
3262 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3263 is_Const_0(mux_true) && is_Sub(mux_false) &&
3264 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3265 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3266 return create_doz(node, cmp_left, cmp_right);
3271 flags = get_flags_node(cond, &pnc);
3273 if (is_Const(mux_true) && is_Const(mux_false)) {
3274 /* both are const, good */
3275 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3276 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3277 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3278 pnc = ia32_get_negated_pnc(pnc);
3279 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3281 /* Not that simple. */
3286 new_node = create_CMov(node, cond, flags, pnc);
3294 * Create a conversion from x87 state register to general purpose.
3296 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3298 ir_node *block = be_transform_node(get_nodes_block(node));
3299 ir_node *op = get_Conv_op(node);
3300 ir_node *new_op = be_transform_node(op);
3301 ir_graph *irg = current_ir_graph;
3302 dbg_info *dbgi = get_irn_dbg_info(node);
3303 ir_mode *mode = get_irn_mode(node);
3304 ir_node *fist, *load, *mem;
3306 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3307 set_irn_pinned(fist, op_pin_state_floats);
3308 set_ia32_use_frame(fist);
3309 set_ia32_op_type(fist, ia32_AddrModeD);
3311 assert(get_mode_size_bits(mode) <= 32);
3312 /* exception we can only store signed 32 bit integers, so for unsigned
3313 we store a 64bit (signed) integer and load the lower bits */
3314 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3315 set_ia32_ls_mode(fist, mode_Ls);
3317 set_ia32_ls_mode(fist, mode_Is);
3319 SET_IA32_ORIG_NODE(fist, node);
3322 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3324 set_irn_pinned(load, op_pin_state_floats);
3325 set_ia32_use_frame(load);
3326 set_ia32_op_type(load, ia32_AddrModeS);
3327 set_ia32_ls_mode(load, mode_Is);
3328 if (get_ia32_ls_mode(fist) == mode_Ls) {
3329 ia32_attr_t *attr = get_ia32_attr(load);
3330 attr->data.need_64bit_stackent = 1;
3332 ia32_attr_t *attr = get_ia32_attr(load);
3333 attr->data.need_32bit_stackent = 1;
3335 SET_IA32_ORIG_NODE(load, node);
3337 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3341 * Creates a x87 strict Conv by placing a Store and a Load
3343 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3345 ir_node *block = get_nodes_block(node);
3346 ir_graph *irg = get_Block_irg(block);
3347 dbg_info *dbgi = get_irn_dbg_info(node);
3348 ir_node *frame = get_irg_frame(irg);
3349 ir_node *store, *load;
3352 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3353 set_ia32_use_frame(store);
3354 set_ia32_op_type(store, ia32_AddrModeD);
3355 SET_IA32_ORIG_NODE(store, node);
3357 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3358 set_ia32_use_frame(load);
3359 set_ia32_op_type(load, ia32_AddrModeS);
3360 SET_IA32_ORIG_NODE(load, node);
3362 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3366 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3367 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3369 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3371 func = get_mode_size_bits(mode) == 8 ?
3372 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3373 return func(dbgi, block, base, index, mem, val, mode);
3377 * Create a conversion from general purpose to x87 register
3379 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3381 ir_node *src_block = get_nodes_block(node);
3382 ir_node *block = be_transform_node(src_block);
3383 ir_graph *irg = get_Block_irg(block);
3384 dbg_info *dbgi = get_irn_dbg_info(node);
3385 ir_node *op = get_Conv_op(node);
3386 ir_node *new_op = NULL;
3388 ir_mode *store_mode;
3393 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3394 if (possible_int_mode_for_fp(src_mode)) {
3395 ia32_address_mode_t am;
3397 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3398 if (am.op_type == ia32_AddrModeS) {
3399 ia32_address_t *addr = &am.addr;
3401 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3402 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3404 set_am_attributes(fild, &am);
3405 SET_IA32_ORIG_NODE(fild, node);
3407 fix_mem_proj(fild, &am);
3412 if (new_op == NULL) {
3413 new_op = be_transform_node(op);
3416 mode = get_irn_mode(op);
3418 /* first convert to 32 bit signed if necessary */
3419 if (get_mode_size_bits(src_mode) < 32) {
3420 if (!upper_bits_clean(new_op, src_mode)) {
3421 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3422 SET_IA32_ORIG_NODE(new_op, node);
3427 assert(get_mode_size_bits(mode) == 32);
3430 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3432 set_ia32_use_frame(store);
3433 set_ia32_op_type(store, ia32_AddrModeD);
3434 set_ia32_ls_mode(store, mode_Iu);
3436 /* exception for 32bit unsigned, do a 64bit spill+load */
3437 if (!mode_is_signed(mode)) {
3440 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3442 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3443 noreg_GP, nomem, zero_const);
3445 set_ia32_use_frame(zero_store);
3446 set_ia32_op_type(zero_store, ia32_AddrModeD);
3447 add_ia32_am_offs_int(zero_store, 4);
3448 set_ia32_ls_mode(zero_store, mode_Iu);
3453 store = new_rd_Sync(dbgi, block, 2, in);
3454 store_mode = mode_Ls;
3456 store_mode = mode_Is;
3460 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3462 set_ia32_use_frame(fild);
3463 set_ia32_op_type(fild, ia32_AddrModeS);
3464 set_ia32_ls_mode(fild, store_mode);
3466 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3472 * Create a conversion from one integer mode into another one
3474 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3475 dbg_info *dbgi, ir_node *block, ir_node *op,
3478 ir_node *new_block = be_transform_node(block);
3480 ir_mode *smaller_mode;
3481 ia32_address_mode_t am;
3482 ia32_address_t *addr = &am.addr;
3485 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3486 smaller_mode = src_mode;
3488 smaller_mode = tgt_mode;
3491 #ifdef DEBUG_libfirm
3493 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3498 match_arguments(&am, block, NULL, op, NULL,
3499 match_am | match_8bit_am | match_16bit_am);
3501 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3502 /* unnecessary conv. in theory it shouldn't have been AM */
3503 assert(is_ia32_NoReg_GP(addr->base));
3504 assert(is_ia32_NoReg_GP(addr->index));
3505 assert(is_NoMem(addr->mem));
3506 assert(am.addr.offset == 0);
3507 assert(am.addr.symconst_ent == NULL);
3511 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3512 addr->mem, am.new_op2, smaller_mode);
3513 set_am_attributes(new_node, &am);
3514 /* match_arguments assume that out-mode = in-mode, this isn't true here
3516 set_ia32_ls_mode(new_node, smaller_mode);
3517 SET_IA32_ORIG_NODE(new_node, node);
3518 new_node = fix_mem_proj(new_node, &am);
3523 * Transforms a Conv node.
3525 * @return The created ia32 Conv node
3527 static ir_node *gen_Conv(ir_node *node)
3529 ir_node *block = get_nodes_block(node);
3530 ir_node *new_block = be_transform_node(block);
3531 ir_node *op = get_Conv_op(node);
3532 ir_node *new_op = NULL;
3533 dbg_info *dbgi = get_irn_dbg_info(node);
3534 ir_mode *src_mode = get_irn_mode(op);
3535 ir_mode *tgt_mode = get_irn_mode(node);
3536 int src_bits = get_mode_size_bits(src_mode);
3537 int tgt_bits = get_mode_size_bits(tgt_mode);
3538 ir_node *res = NULL;
3540 assert(!mode_is_int(src_mode) || src_bits <= 32);
3541 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3543 /* modeB -> X should already be lowered by the lower_mode_b pass */
3544 if (src_mode == mode_b) {
3545 panic("ConvB not lowered %+F", node);
3548 if (src_mode == tgt_mode) {
3549 if (get_Conv_strict(node)) {
3550 if (ia32_cg_config.use_sse2) {
3551 /* when we are in SSE mode, we can kill all strict no-op conversion */
3552 return be_transform_node(op);
3555 /* this should be optimized already, but who knows... */
3556 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3557 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3558 return be_transform_node(op);
3562 if (mode_is_float(src_mode)) {
3563 new_op = be_transform_node(op);
3564 /* we convert from float ... */
3565 if (mode_is_float(tgt_mode)) {
3567 if (ia32_cg_config.use_sse2) {
3568 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3569 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3571 set_ia32_ls_mode(res, tgt_mode);
3573 if (get_Conv_strict(node)) {
3574 /* if fp_no_float_fold is not set then we assume that we
3575 * don't have any float operations in a non
3576 * mode_float_arithmetic mode and can skip strict upconvs */
3577 if (src_bits < tgt_bits
3578 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3579 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3582 res = gen_x87_strict_conv(tgt_mode, new_op);
3583 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3587 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3592 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3593 if (ia32_cg_config.use_sse2) {
3594 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3596 set_ia32_ls_mode(res, src_mode);
3598 return gen_x87_fp_to_gp(node);
3602 /* we convert from int ... */
3603 if (mode_is_float(tgt_mode)) {
3605 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3606 if (ia32_cg_config.use_sse2) {
3607 new_op = be_transform_node(op);
3608 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3610 set_ia32_ls_mode(res, tgt_mode);
3612 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3613 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3614 res = gen_x87_gp_to_fp(node, src_mode);
3616 /* we need a strict-Conv, if the int mode has more bits than the
3618 if (float_mantissa < int_mantissa) {
3619 res = gen_x87_strict_conv(tgt_mode, res);
3620 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3624 } else if (tgt_mode == mode_b) {
3625 /* mode_b lowering already took care that we only have 0/1 values */
3626 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3627 src_mode, tgt_mode));
3628 return be_transform_node(op);
3631 if (src_bits == tgt_bits) {
3632 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3633 src_mode, tgt_mode));
3634 return be_transform_node(op);
3637 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3645 static ir_node *create_immediate_or_transform(ir_node *node,
3646 char immediate_constraint_type)
3648 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3649 if (new_node == NULL) {
3650 new_node = be_transform_node(node);
3656 * Transforms a FrameAddr into an ia32 Add.
3658 static ir_node *gen_be_FrameAddr(ir_node *node)
3660 ir_node *block = be_transform_node(get_nodes_block(node));
3661 ir_node *op = be_get_FrameAddr_frame(node);
3662 ir_node *new_op = be_transform_node(op);
3663 dbg_info *dbgi = get_irn_dbg_info(node);
3666 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3667 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3668 set_ia32_use_frame(new_node);
3670 SET_IA32_ORIG_NODE(new_node, node);
3676 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3678 static ir_node *gen_be_Return(ir_node *node)
3680 ir_graph *irg = current_ir_graph;
3681 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3682 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3683 ir_entity *ent = get_irg_entity(irg);
3684 ir_type *tp = get_entity_type(ent);
3689 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3690 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3692 int pn_ret_val, pn_ret_mem, arity, i;
3694 assert(ret_val != NULL);
3695 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3696 return be_duplicate_node(node);
3699 res_type = get_method_res_type(tp, 0);
3701 if (! is_Primitive_type(res_type)) {
3702 return be_duplicate_node(node);
3705 mode = get_type_mode(res_type);
3706 if (! mode_is_float(mode)) {
3707 return be_duplicate_node(node);
3710 assert(get_method_n_ress(tp) == 1);
3712 pn_ret_val = get_Proj_proj(ret_val);
3713 pn_ret_mem = get_Proj_proj(ret_mem);
3715 /* get the Barrier */
3716 barrier = get_Proj_pred(ret_val);
3718 /* get result input of the Barrier */
3719 ret_val = get_irn_n(barrier, pn_ret_val);
3720 new_ret_val = be_transform_node(ret_val);
3722 /* get memory input of the Barrier */
3723 ret_mem = get_irn_n(barrier, pn_ret_mem);
3724 new_ret_mem = be_transform_node(ret_mem);
3726 frame = get_irg_frame(irg);
3728 dbgi = get_irn_dbg_info(barrier);
3729 block = be_transform_node(get_nodes_block(barrier));
3731 /* store xmm0 onto stack */
3732 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3733 new_ret_mem, new_ret_val);
3734 set_ia32_ls_mode(sse_store, mode);
3735 set_ia32_op_type(sse_store, ia32_AddrModeD);
3736 set_ia32_use_frame(sse_store);
3738 /* load into x87 register */
3739 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3740 set_ia32_op_type(fld, ia32_AddrModeS);
3741 set_ia32_use_frame(fld);
3743 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3744 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3746 /* create a new barrier */
3747 arity = get_irn_arity(barrier);
3748 in = ALLOCAN(ir_node*, arity);
3749 for (i = 0; i < arity; ++i) {
3752 if (i == pn_ret_val) {
3754 } else if (i == pn_ret_mem) {
3757 ir_node *in = get_irn_n(barrier, i);
3758 new_in = be_transform_node(in);
3763 new_barrier = new_ir_node(dbgi, irg, block,
3764 get_irn_op(barrier), get_irn_mode(barrier),
3766 copy_node_attr(barrier, new_barrier);
3767 be_duplicate_deps(barrier, new_barrier);
3768 be_set_transformed_node(barrier, new_barrier);
3770 /* transform normally */
3771 return be_duplicate_node(node);
3775 * Transform a be_AddSP into an ia32_SubSP.
3777 static ir_node *gen_be_AddSP(ir_node *node)
3779 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3780 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3782 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3783 match_am | match_immediate);
3787 * Transform a be_SubSP into an ia32_AddSP
3789 static ir_node *gen_be_SubSP(ir_node *node)
3791 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3792 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3794 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3795 match_am | match_immediate);
3799 * Change some phi modes
3801 static ir_node *gen_Phi(ir_node *node)
3803 const arch_register_req_t *req;
3804 ir_node *block = be_transform_node(get_nodes_block(node));
3805 ir_graph *irg = current_ir_graph;
3806 dbg_info *dbgi = get_irn_dbg_info(node);
3807 ir_mode *mode = get_irn_mode(node);
3810 if (ia32_mode_needs_gp_reg(mode)) {
3811 /* we shouldn't have any 64bit stuff around anymore */
3812 assert(get_mode_size_bits(mode) <= 32);
3813 /* all integer operations are on 32bit registers now */
3815 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3816 } else if (mode_is_float(mode)) {
3817 if (ia32_cg_config.use_sse2) {
3819 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3822 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3825 req = arch_no_register_req;
3828 /* phi nodes allow loops, so we use the old arguments for now
3829 * and fix this later */
3830 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3831 get_irn_in(node) + 1);
3832 copy_node_attr(node, phi);
3833 be_duplicate_deps(node, phi);
3835 arch_set_out_register_req(phi, 0, req);
3837 be_enqueue_preds(node);
3842 static ir_node *gen_Jmp(ir_node *node)
3844 ir_node *block = get_nodes_block(node);
3845 ir_node *new_block = be_transform_node(block);
3846 dbg_info *dbgi = get_irn_dbg_info(node);
3849 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3850 SET_IA32_ORIG_NODE(new_node, node);
3858 static ir_node *gen_IJmp(ir_node *node)
3860 ir_node *block = get_nodes_block(node);
3861 ir_node *new_block = be_transform_node(block);
3862 dbg_info *dbgi = get_irn_dbg_info(node);
3863 ir_node *op = get_IJmp_target(node);
3865 ia32_address_mode_t am;
3866 ia32_address_t *addr = &am.addr;
3868 assert(get_irn_mode(op) == mode_P);
3870 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3872 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3873 addr->mem, am.new_op2);
3874 set_am_attributes(new_node, &am);
3875 SET_IA32_ORIG_NODE(new_node, node);
3877 new_node = fix_mem_proj(new_node, &am);
3883 * Transform a Bound node.
3885 static ir_node *gen_Bound(ir_node *node)
3888 ir_node *lower = get_Bound_lower(node);
3889 dbg_info *dbgi = get_irn_dbg_info(node);
3891 if (is_Const_0(lower)) {
3892 /* typical case for Java */
3893 ir_node *sub, *res, *flags, *block;
3895 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3896 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3898 block = get_nodes_block(res);
3899 if (! is_Proj(res)) {
3901 set_irn_mode(sub, mode_T);
3902 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3904 sub = get_Proj_pred(res);
3906 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3907 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3908 SET_IA32_ORIG_NODE(new_node, node);
3910 panic("generic Bound not supported in ia32 Backend");
3916 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3918 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3919 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3921 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3922 match_immediate | match_mode_neutral);
3925 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3927 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3928 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3929 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3933 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3935 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3936 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3937 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3941 static ir_node *gen_ia32_l_Add(ir_node *node)
3943 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3944 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3945 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3946 match_commutative | match_am | match_immediate |
3947 match_mode_neutral);
3949 if (is_Proj(lowered)) {
3950 lowered = get_Proj_pred(lowered);
3952 assert(is_ia32_Add(lowered));
3953 set_irn_mode(lowered, mode_T);
3959 static ir_node *gen_ia32_l_Adc(ir_node *node)
3961 return gen_binop_flags(node, new_bd_ia32_Adc,
3962 match_commutative | match_am | match_immediate |
3963 match_mode_neutral);
3967 * Transforms a l_MulS into a "real" MulS node.
3969 * @return the created ia32 Mul node
3971 static ir_node *gen_ia32_l_Mul(ir_node *node)
3973 ir_node *left = get_binop_left(node);
3974 ir_node *right = get_binop_right(node);
3976 return gen_binop(node, left, right, new_bd_ia32_Mul,
3977 match_commutative | match_am | match_mode_neutral);
3981 * Transforms a l_IMulS into a "real" IMul1OPS node.
3983 * @return the created ia32 IMul1OP node
3985 static ir_node *gen_ia32_l_IMul(ir_node *node)
3987 ir_node *left = get_binop_left(node);
3988 ir_node *right = get_binop_right(node);
3990 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3991 match_commutative | match_am | match_mode_neutral);
3994 static ir_node *gen_ia32_l_Sub(ir_node *node)
3996 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3997 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3998 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3999 match_am | match_immediate | match_mode_neutral);
4001 if (is_Proj(lowered)) {
4002 lowered = get_Proj_pred(lowered);
4004 assert(is_ia32_Sub(lowered));
4005 set_irn_mode(lowered, mode_T);
4011 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4013 return gen_binop_flags(node, new_bd_ia32_Sbb,
4014 match_am | match_immediate | match_mode_neutral);
4018 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4019 * op1 - target to be shifted
4020 * op2 - contains bits to be shifted into target
4022 * Only op3 can be an immediate.
4024 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4025 ir_node *low, ir_node *count)
4027 ir_node *block = get_nodes_block(node);
4028 ir_node *new_block = be_transform_node(block);
4029 dbg_info *dbgi = get_irn_dbg_info(node);
4030 ir_node *new_high = be_transform_node(high);
4031 ir_node *new_low = be_transform_node(low);
4035 /* the shift amount can be any mode that is bigger than 5 bits, since all
4036 * other bits are ignored anyway */
4037 while (is_Conv(count) &&
4038 get_irn_n_edges(count) == 1 &&
4039 mode_is_int(get_irn_mode(count))) {
4040 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4041 count = get_Conv_op(count);
4043 new_count = create_immediate_or_transform(count, 0);
4045 if (is_ia32_l_ShlD(node)) {
4046 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4049 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4052 SET_IA32_ORIG_NODE(new_node, node);
4057 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4059 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4060 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4061 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4062 return gen_lowered_64bit_shifts(node, high, low, count);
4065 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4067 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4068 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4069 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4070 return gen_lowered_64bit_shifts(node, high, low, count);
4073 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4075 ir_node *src_block = get_nodes_block(node);
4076 ir_node *block = be_transform_node(src_block);
4077 ir_graph *irg = current_ir_graph;
4078 dbg_info *dbgi = get_irn_dbg_info(node);
4079 ir_node *frame = get_irg_frame(irg);
4080 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4081 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4082 ir_node *new_val_low = be_transform_node(val_low);
4083 ir_node *new_val_high = be_transform_node(val_high);
4085 ir_node *sync, *fild, *res;
4086 ir_node *store_low, *store_high;
4088 if (ia32_cg_config.use_sse2) {
4089 panic("ia32_l_LLtoFloat not implemented for SSE2");
4093 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4095 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4097 SET_IA32_ORIG_NODE(store_low, node);
4098 SET_IA32_ORIG_NODE(store_high, node);
4100 set_ia32_use_frame(store_low);
4101 set_ia32_use_frame(store_high);
4102 set_ia32_op_type(store_low, ia32_AddrModeD);
4103 set_ia32_op_type(store_high, ia32_AddrModeD);
4104 set_ia32_ls_mode(store_low, mode_Iu);
4105 set_ia32_ls_mode(store_high, mode_Is);
4106 add_ia32_am_offs_int(store_high, 4);
4110 sync = new_rd_Sync(dbgi, block, 2, in);
4113 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4115 set_ia32_use_frame(fild);
4116 set_ia32_op_type(fild, ia32_AddrModeS);
4117 set_ia32_ls_mode(fild, mode_Ls);
4119 SET_IA32_ORIG_NODE(fild, node);
4121 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4123 if (! mode_is_signed(get_irn_mode(val_high))) {
4124 ia32_address_mode_t am;
4126 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4129 am.addr.base = noreg_GP;
4130 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4131 am.addr.mem = nomem;
4134 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4135 am.addr.use_frame = 0;
4136 am.addr.frame_entity = NULL;
4137 am.addr.symconst_sign = 0;
4138 am.ls_mode = mode_F;
4139 am.mem_proj = nomem;
4140 am.op_type = ia32_AddrModeS;
4142 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4143 am.pinned = op_pin_state_floats;
4145 am.ins_permuted = 0;
4147 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4148 am.new_op1, am.new_op2, get_fpcw());
4149 set_am_attributes(fadd, &am);
4151 set_irn_mode(fadd, mode_T);
4152 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4157 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4159 ir_node *src_block = get_nodes_block(node);
4160 ir_node *block = be_transform_node(src_block);
4161 ir_graph *irg = get_Block_irg(block);
4162 dbg_info *dbgi = get_irn_dbg_info(node);
4163 ir_node *frame = get_irg_frame(irg);
4164 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4165 ir_node *new_val = be_transform_node(val);
4166 ir_node *fist, *mem;
4168 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4169 SET_IA32_ORIG_NODE(fist, node);
4170 set_ia32_use_frame(fist);
4171 set_ia32_op_type(fist, ia32_AddrModeD);
4172 set_ia32_ls_mode(fist, mode_Ls);
4178 * the BAD transformer.
4180 static ir_node *bad_transform(ir_node *node)
4182 panic("No transform function for %+F available.", node);
4186 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4188 ir_node *block = be_transform_node(get_nodes_block(node));
4189 ir_graph *irg = get_Block_irg(block);
4190 ir_node *pred = get_Proj_pred(node);
4191 ir_node *new_pred = be_transform_node(pred);
4192 ir_node *frame = get_irg_frame(irg);
4193 dbg_info *dbgi = get_irn_dbg_info(node);
4194 long pn = get_Proj_proj(node);
4199 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4200 SET_IA32_ORIG_NODE(load, node);
4201 set_ia32_use_frame(load);
4202 set_ia32_op_type(load, ia32_AddrModeS);
4203 set_ia32_ls_mode(load, mode_Iu);
4204 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4205 * 32 bit from it with this particular load */
4206 attr = get_ia32_attr(load);
4207 attr->data.need_64bit_stackent = 1;
4209 if (pn == pn_ia32_l_FloattoLL_res_high) {
4210 add_ia32_am_offs_int(load, 4);
4212 assert(pn == pn_ia32_l_FloattoLL_res_low);
4215 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4221 * Transform the Projs of an AddSP.
4223 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4225 ir_node *block = be_transform_node(get_nodes_block(node));
4226 ir_node *pred = get_Proj_pred(node);
4227 ir_node *new_pred = be_transform_node(pred);
4228 dbg_info *dbgi = get_irn_dbg_info(node);
4229 long proj = get_Proj_proj(node);
4231 if (proj == pn_be_AddSP_sp) {
4232 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4233 pn_ia32_SubSP_stack);
4234 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4236 } else if (proj == pn_be_AddSP_res) {
4237 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4238 pn_ia32_SubSP_addr);
4239 } else if (proj == pn_be_AddSP_M) {
4240 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4243 panic("No idea how to transform proj->AddSP");
4247 * Transform the Projs of a SubSP.
4249 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4251 ir_node *block = be_transform_node(get_nodes_block(node));
4252 ir_node *pred = get_Proj_pred(node);
4253 ir_node *new_pred = be_transform_node(pred);
4254 dbg_info *dbgi = get_irn_dbg_info(node);
4255 long proj = get_Proj_proj(node);
4257 if (proj == pn_be_SubSP_sp) {
4258 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4259 pn_ia32_AddSP_stack);
4260 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4262 } else if (proj == pn_be_SubSP_M) {
4263 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4266 panic("No idea how to transform proj->SubSP");
4270 * Transform and renumber the Projs from a Load.
4272 static ir_node *gen_Proj_Load(ir_node *node)
4275 ir_node *block = be_transform_node(get_nodes_block(node));
4276 ir_node *pred = get_Proj_pred(node);
4277 dbg_info *dbgi = get_irn_dbg_info(node);
4278 long proj = get_Proj_proj(node);
4280 /* loads might be part of source address mode matches, so we don't
4281 * transform the ProjMs yet (with the exception of loads whose result is
4284 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4286 ir_node *old_block = get_nodes_block(node);
4288 /* this is needed, because sometimes we have loops that are only
4289 reachable through the ProjM */
4290 be_enqueue_preds(node);
4291 /* do it in 2 steps, to silence firm verifier */
4292 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4293 set_Proj_proj(res, pn_ia32_mem);
4297 /* renumber the proj */
4298 new_pred = be_transform_node(pred);
4299 if (is_ia32_Load(new_pred)) {
4302 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4304 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4305 case pn_Load_X_regular:
4306 return new_rd_Jmp(dbgi, block);
4307 case pn_Load_X_except:
4308 /* This Load might raise an exception. Mark it. */
4309 set_ia32_exc_label(new_pred, 1);
4310 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4314 } else if (is_ia32_Conv_I2I(new_pred) ||
4315 is_ia32_Conv_I2I8Bit(new_pred)) {
4316 set_irn_mode(new_pred, mode_T);
4317 if (proj == pn_Load_res) {
4318 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4319 } else if (proj == pn_Load_M) {
4320 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4322 } else if (is_ia32_xLoad(new_pred)) {
4325 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4327 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4328 case pn_Load_X_regular:
4329 return new_rd_Jmp(dbgi, block);
4330 case pn_Load_X_except:
4331 /* This Load might raise an exception. Mark it. */
4332 set_ia32_exc_label(new_pred, 1);
4333 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4337 } else if (is_ia32_vfld(new_pred)) {
4340 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4342 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4343 case pn_Load_X_regular:
4344 return new_rd_Jmp(dbgi, block);
4345 case pn_Load_X_except:
4346 /* This Load might raise an exception. Mark it. */
4347 set_ia32_exc_label(new_pred, 1);
4348 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4353 /* can happen for ProJMs when source address mode happened for the
4356 /* however it should not be the result proj, as that would mean the
4357 load had multiple users and should not have been used for
4359 if (proj != pn_Load_M) {
4360 panic("internal error: transformed node not a Load");
4362 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4365 panic("No idea how to transform proj");
4369 * Transform and renumber the Projs from a DivMod like instruction.
4371 static ir_node *gen_Proj_DivMod(ir_node *node)
4373 ir_node *block = be_transform_node(get_nodes_block(node));
4374 ir_node *pred = get_Proj_pred(node);
4375 ir_node *new_pred = be_transform_node(pred);
4376 dbg_info *dbgi = get_irn_dbg_info(node);
4377 long proj = get_Proj_proj(node);
4379 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4381 switch (get_irn_opcode(pred)) {
4385 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4387 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4388 case pn_Div_X_regular:
4389 return new_rd_Jmp(dbgi, block);
4390 case pn_Div_X_except:
4391 set_ia32_exc_label(new_pred, 1);
4392 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4400 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4402 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4403 case pn_Mod_X_except:
4404 set_ia32_exc_label(new_pred, 1);
4405 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4413 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4414 case pn_DivMod_res_div:
4415 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4416 case pn_DivMod_res_mod:
4417 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4418 case pn_DivMod_X_regular:
4419 return new_rd_Jmp(dbgi, block);
4420 case pn_DivMod_X_except:
4421 set_ia32_exc_label(new_pred, 1);
4422 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4431 panic("No idea how to transform proj->DivMod");
4435 * Transform and renumber the Projs from a CopyB.
4437 static ir_node *gen_Proj_CopyB(ir_node *node)
4439 ir_node *block = be_transform_node(get_nodes_block(node));
4440 ir_node *pred = get_Proj_pred(node);
4441 ir_node *new_pred = be_transform_node(pred);
4442 dbg_info *dbgi = get_irn_dbg_info(node);
4443 long proj = get_Proj_proj(node);
4446 case pn_CopyB_M_regular:
4447 if (is_ia32_CopyB_i(new_pred)) {
4448 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4449 } else if (is_ia32_CopyB(new_pred)) {
4450 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4457 panic("No idea how to transform proj->CopyB");
4461 * Transform and renumber the Projs from a Quot.
4463 static ir_node *gen_Proj_Quot(ir_node *node)
4465 ir_node *block = be_transform_node(get_nodes_block(node));
4466 ir_node *pred = get_Proj_pred(node);
4467 ir_node *new_pred = be_transform_node(pred);
4468 dbg_info *dbgi = get_irn_dbg_info(node);
4469 long proj = get_Proj_proj(node);
4473 if (is_ia32_xDiv(new_pred)) {
4474 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4475 } else if (is_ia32_vfdiv(new_pred)) {
4476 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4480 if (is_ia32_xDiv(new_pred)) {
4481 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4482 } else if (is_ia32_vfdiv(new_pred)) {
4483 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4486 case pn_Quot_X_regular:
4487 case pn_Quot_X_except:
4492 panic("No idea how to transform proj->Quot");
4495 static ir_node *gen_be_Call(ir_node *node)
4497 dbg_info *const dbgi = get_irn_dbg_info(node);
4498 ir_node *const src_block = get_nodes_block(node);
4499 ir_node *const block = be_transform_node(src_block);
4500 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4501 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4502 ir_node *const sp = be_transform_node(src_sp);
4503 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4504 ia32_address_mode_t am;
4505 ia32_address_t *const addr = &am.addr;
4510 ir_node * eax = noreg_GP;
4511 ir_node * ecx = noreg_GP;
4512 ir_node * edx = noreg_GP;
4513 unsigned const pop = be_Call_get_pop(node);
4514 ir_type *const call_tp = be_Call_get_type(node);
4515 int old_no_pic_adjust;
4517 /* Run the x87 simulator if the call returns a float value */
4518 if (get_method_n_ress(call_tp) > 0) {
4519 ir_type *const res_type = get_method_res_type(call_tp, 0);
4520 ir_mode *const res_mode = get_type_mode(res_type);
4522 if (res_mode != NULL && mode_is_float(res_mode)) {
4523 env_cg->do_x87_sim = 1;
4527 /* We do not want be_Call direct calls */
4528 assert(be_Call_get_entity(node) == NULL);
4530 /* special case for PIC trampoline calls */
4531 old_no_pic_adjust = no_pic_adjust;
4532 no_pic_adjust = env_cg->birg->main_env->options->pic;
4534 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4535 match_am | match_immediate);
4537 no_pic_adjust = old_no_pic_adjust;
4539 i = get_irn_arity(node) - 1;
4540 fpcw = be_transform_node(get_irn_n(node, i--));
4541 for (; i >= be_pos_Call_first_arg; --i) {
4542 arch_register_req_t const *const req = arch_get_register_req(node, i);
4543 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4545 assert(req->type == arch_register_req_type_limited);
4546 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4548 switch (*req->limited) {
4549 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4550 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4551 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4552 default: panic("Invalid GP register for register parameter");
4556 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4557 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4558 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4559 set_am_attributes(call, &am);
4560 call = fix_mem_proj(call, &am);
4562 if (get_irn_pinned(node) == op_pin_state_pinned)
4563 set_irn_pinned(call, op_pin_state_pinned);
4565 SET_IA32_ORIG_NODE(call, node);
4567 if (ia32_cg_config.use_sse2) {
4568 /* remember this call for post-processing */
4569 ARR_APP1(ir_node *, call_list, call);
4570 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4577 * Transform Builtin trap
4579 static ir_node *gen_trap(ir_node *node) {
4580 dbg_info *dbgi = get_irn_dbg_info(node);
4581 ir_node *block = be_transform_node(get_nodes_block(node));
4582 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4584 return new_bd_ia32_UD2(dbgi, block, mem);
4588 * Transform Builtin debugbreak
4590 static ir_node *gen_debugbreak(ir_node *node) {
4591 dbg_info *dbgi = get_irn_dbg_info(node);
4592 ir_node *block = be_transform_node(get_nodes_block(node));
4593 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4595 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4599 * Transform Builtin return_address
4601 static ir_node *gen_return_address(ir_node *node) {
4602 ir_node *param = get_Builtin_param(node, 0);
4603 ir_node *frame = get_Builtin_param(node, 1);
4604 dbg_info *dbgi = get_irn_dbg_info(node);
4605 tarval *tv = get_Const_tarval(param);
4606 unsigned long value = get_tarval_long(tv);
4608 ir_node *block = be_transform_node(get_nodes_block(node));
4609 ir_node *ptr = be_transform_node(frame);
4613 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4614 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4615 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4618 /* load the return address from this frame */
4619 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4621 set_irn_pinned(load, get_irn_pinned(node));
4622 set_ia32_op_type(load, ia32_AddrModeS);
4623 set_ia32_ls_mode(load, mode_Iu);
4625 set_ia32_am_offs_int(load, 0);
4626 set_ia32_use_frame(load);
4627 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4629 if (get_irn_pinned(node) == op_pin_state_floats) {
4630 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4631 && pn_ia32_vfld_res == pn_ia32_Load_res
4632 && pn_ia32_Load_res == pn_ia32_res);
4633 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4636 SET_IA32_ORIG_NODE(load, node);
4637 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4641 * Transform Builtin frame_address
4643 static ir_node *gen_frame_address(ir_node *node) {
4644 ir_node *param = get_Builtin_param(node, 0);
4645 ir_node *frame = get_Builtin_param(node, 1);
4646 dbg_info *dbgi = get_irn_dbg_info(node);
4647 tarval *tv = get_Const_tarval(param);
4648 unsigned long value = get_tarval_long(tv);
4650 ir_node *block = be_transform_node(get_nodes_block(node));
4651 ir_node *ptr = be_transform_node(frame);
4656 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4657 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4658 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4661 /* load the frame address from this frame */
4662 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4664 set_irn_pinned(load, get_irn_pinned(node));
4665 set_ia32_op_type(load, ia32_AddrModeS);
4666 set_ia32_ls_mode(load, mode_Iu);
4668 ent = ia32_get_frame_address_entity();
4670 set_ia32_am_offs_int(load, 0);
4671 set_ia32_use_frame(load);
4672 set_ia32_frame_ent(load, ent);
4674 /* will fail anyway, but gcc does this: */
4675 set_ia32_am_offs_int(load, 0);
4678 if (get_irn_pinned(node) == op_pin_state_floats) {
4679 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4680 && pn_ia32_vfld_res == pn_ia32_Load_res
4681 && pn_ia32_Load_res == pn_ia32_res);
4682 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4685 SET_IA32_ORIG_NODE(load, node);
4686 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4690 * Transform Builtin frame_address
4692 static ir_node *gen_prefetch(ir_node *node) {
4694 ir_node *ptr, *block, *mem, *base, *index;
4695 ir_node *param, *new_node;
4698 ia32_address_t addr;
4700 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4701 /* no prefetch at all, route memory */
4702 return be_transform_node(get_Builtin_mem(node));
4705 param = get_Builtin_param(node, 1);
4706 tv = get_Const_tarval(param);
4707 rw = get_tarval_long(tv);
4709 /* construct load address */
4710 memset(&addr, 0, sizeof(addr));
4711 ptr = get_Builtin_param(node, 0);
4712 ia32_create_address_mode(&addr, ptr, 0);
4719 base = be_transform_node(base);
4722 if (index == NULL) {
4725 index = be_transform_node(index);
4728 dbgi = get_irn_dbg_info(node);
4729 block = be_transform_node(get_nodes_block(node));
4730 mem = be_transform_node(get_Builtin_mem(node));
4732 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4733 /* we have 3DNow!, this was already checked above */
4734 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4735 } else if (ia32_cg_config.use_sse_prefetch) {
4736 /* note: rw == 1 is IGNORED in that case */
4737 param = get_Builtin_param(node, 2);
4738 tv = get_Const_tarval(param);
4739 locality = get_tarval_long(tv);
4741 /* SSE style prefetch */
4744 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4747 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4750 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4753 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4757 assert(ia32_cg_config.use_3dnow_prefetch);
4758 /* 3DNow! style prefetch */
4759 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4762 set_irn_pinned(new_node, get_irn_pinned(node));
4763 set_ia32_op_type(new_node, ia32_AddrModeS);
4764 set_ia32_ls_mode(new_node, mode_Bu);
4765 set_address(new_node, &addr);
4767 SET_IA32_ORIG_NODE(new_node, node);
4769 be_dep_on_frame(new_node);
4770 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4774 * Transform bsf like node
4776 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4778 ir_node *param = get_Builtin_param(node, 0);
4779 dbg_info *dbgi = get_irn_dbg_info(node);
4781 ir_node *block = get_nodes_block(node);
4782 ir_node *new_block = be_transform_node(block);
4784 ia32_address_mode_t am;
4785 ia32_address_t *addr = &am.addr;
4788 match_arguments(&am, block, NULL, param, NULL, match_am);
4790 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4791 set_am_attributes(cnt, &am);
4792 set_ia32_ls_mode(cnt, get_irn_mode(param));
4794 SET_IA32_ORIG_NODE(cnt, node);
4795 return fix_mem_proj(cnt, &am);
4799 * Transform builtin ffs.
4801 static ir_node *gen_ffs(ir_node *node)
4803 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4804 ir_node *real = skip_Proj(bsf);
4805 dbg_info *dbgi = get_irn_dbg_info(real);
4806 ir_node *block = get_nodes_block(real);
4807 ir_node *flag, *set, *conv, *neg, *or;
4810 if (get_irn_mode(real) != mode_T) {
4811 set_irn_mode(real, mode_T);
4812 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4815 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4818 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
4819 SET_IA32_ORIG_NODE(set, node);
4822 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4823 SET_IA32_ORIG_NODE(conv, node);
4826 neg = new_bd_ia32_Neg(dbgi, block, conv);
4829 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4830 set_ia32_commutative(or);
4833 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4837 * Transform builtin clz.
4839 static ir_node *gen_clz(ir_node *node)
4841 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4842 ir_node *real = skip_Proj(bsr);
4843 dbg_info *dbgi = get_irn_dbg_info(real);
4844 ir_node *block = get_nodes_block(real);
4845 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4847 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4851 * Transform builtin ctz.
4853 static ir_node *gen_ctz(ir_node *node)
4855 return gen_unop_AM(node, new_bd_ia32_Bsf);
4859 * Transform builtin parity.
4861 static ir_node *gen_parity(ir_node *node)
4863 ir_node *param = get_Builtin_param(node, 0);
4864 dbg_info *dbgi = get_irn_dbg_info(node);
4866 ir_node *block = get_nodes_block(node);
4868 ir_node *new_block = be_transform_node(block);
4869 ir_node *imm, *cmp, *new_node;
4871 ia32_address_mode_t am;
4872 ia32_address_t *addr = &am.addr;
4876 match_arguments(&am, block, NULL, param, NULL, match_am);
4877 imm = ia32_create_Immediate(NULL, 0, 0);
4878 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4879 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4880 set_am_attributes(cmp, &am);
4881 set_ia32_ls_mode(cmp, mode_Iu);
4883 SET_IA32_ORIG_NODE(cmp, node);
4885 cmp = fix_mem_proj(cmp, &am);
4888 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
4889 SET_IA32_ORIG_NODE(new_node, node);
4892 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4893 nomem, new_node, mode_Bu);
4894 SET_IA32_ORIG_NODE(new_node, node);
4899 * Transform builtin popcount
4901 static ir_node *gen_popcount(ir_node *node) {
4902 ir_node *param = get_Builtin_param(node, 0);
4903 dbg_info *dbgi = get_irn_dbg_info(node);
4905 ir_node *block = get_nodes_block(node);
4906 ir_node *new_block = be_transform_node(block);
4909 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4911 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4912 if (ia32_cg_config.use_popcnt) {
4913 ia32_address_mode_t am;
4914 ia32_address_t *addr = &am.addr;
4917 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4919 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4920 set_am_attributes(cnt, &am);
4921 set_ia32_ls_mode(cnt, get_irn_mode(param));
4923 SET_IA32_ORIG_NODE(cnt, node);
4924 return fix_mem_proj(cnt, &am);
4927 new_param = be_transform_node(param);
4929 /* do the standard popcount algo */
4931 /* m1 = x & 0x55555555 */
4932 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4933 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4936 simm = ia32_create_Immediate(NULL, 0, 1);
4937 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4939 /* m2 = s1 & 0x55555555 */
4940 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4943 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4945 /* m4 = m3 & 0x33333333 */
4946 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4947 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4950 simm = ia32_create_Immediate(NULL, 0, 2);
4951 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4953 /* m5 = s2 & 0x33333333 */
4954 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4957 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4959 /* m7 = m6 & 0x0F0F0F0F */
4960 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4961 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4964 simm = ia32_create_Immediate(NULL, 0, 4);
4965 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4967 /* m8 = s3 & 0x0F0F0F0F */
4968 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4971 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4973 /* m10 = m9 & 0x00FF00FF */
4974 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4975 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4978 simm = ia32_create_Immediate(NULL, 0, 8);
4979 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4981 /* m11 = s4 & 0x00FF00FF */
4982 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4984 /* m12 = m10 + m11 */
4985 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4987 /* m13 = m12 & 0x0000FFFF */
4988 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4989 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4991 /* s5 = m12 >> 16 */
4992 simm = ia32_create_Immediate(NULL, 0, 16);
4993 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4995 /* res = m13 + s5 */
4996 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5000 * Transform builtin byte swap.
5002 static ir_node *gen_bswap(ir_node *node) {
5003 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5004 dbg_info *dbgi = get_irn_dbg_info(node);
5006 ir_node *block = get_nodes_block(node);
5007 ir_node *new_block = be_transform_node(block);
5008 ir_mode *mode = get_irn_mode(param);
5009 unsigned size = get_mode_size_bits(mode);
5010 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5014 if (ia32_cg_config.use_i486) {
5015 /* swap available */
5016 return new_bd_ia32_Bswap(dbgi, new_block, param);
5018 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5019 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5021 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5022 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5024 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5026 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5027 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5029 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5030 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5033 /* swap16 always available */
5034 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5037 panic("Invalid bswap size (%d)", size);
5042 * Transform builtin outport.
5044 static ir_node *gen_outport(ir_node *node) {
5045 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5046 ir_node *oldv = get_Builtin_param(node, 1);
5047 ir_mode *mode = get_irn_mode(oldv);
5048 ir_node *value = be_transform_node(oldv);
5049 ir_node *block = be_transform_node(get_nodes_block(node));
5050 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5051 dbg_info *dbgi = get_irn_dbg_info(node);
5053 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5054 set_ia32_ls_mode(res, mode);
5059 * Transform builtin inport.
5061 static ir_node *gen_inport(ir_node *node) {
5062 ir_type *tp = get_Builtin_type(node);
5063 ir_type *rstp = get_method_res_type(tp, 0);
5064 ir_mode *mode = get_type_mode(rstp);
5065 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5066 ir_node *block = be_transform_node(get_nodes_block(node));
5067 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5068 dbg_info *dbgi = get_irn_dbg_info(node);
5070 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5071 set_ia32_ls_mode(res, mode);
5073 /* check for missing Result Proj */
5078 * Transform a builtin inner trampoline
5080 static ir_node *gen_inner_trampoline(ir_node *node) {
5081 ir_node *ptr = get_Builtin_param(node, 0);
5082 ir_node *callee = get_Builtin_param(node, 1);
5083 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5084 ir_node *mem = get_Builtin_mem(node);
5085 ir_node *block = get_nodes_block(node);
5086 ir_node *new_block = be_transform_node(block);
5090 ir_node *trampoline;
5092 dbg_info *dbgi = get_irn_dbg_info(node);
5093 ia32_address_t addr;
5095 /* construct store address */
5096 memset(&addr, 0, sizeof(addr));
5097 ia32_create_address_mode(&addr, ptr, 0);
5099 if (addr.base == NULL) {
5100 addr.base = noreg_GP;
5102 addr.base = be_transform_node(addr.base);
5105 if (addr.index == NULL) {
5106 addr.index = noreg_GP;
5108 addr.index = be_transform_node(addr.index);
5110 addr.mem = be_transform_node(mem);
5112 /* mov ecx, <env> */
5113 val = ia32_create_Immediate(NULL, 0, 0xB9);
5114 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5115 addr.index, addr.mem, val);
5116 set_irn_pinned(store, get_irn_pinned(node));
5117 set_ia32_op_type(store, ia32_AddrModeD);
5118 set_ia32_ls_mode(store, mode_Bu);
5119 set_address(store, &addr);
5123 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5124 addr.index, addr.mem, env);
5125 set_irn_pinned(store, get_irn_pinned(node));
5126 set_ia32_op_type(store, ia32_AddrModeD);
5127 set_ia32_ls_mode(store, mode_Iu);
5128 set_address(store, &addr);
5132 /* jmp rel <callee> */
5133 val = ia32_create_Immediate(NULL, 0, 0xE9);
5134 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5135 addr.index, addr.mem, val);
5136 set_irn_pinned(store, get_irn_pinned(node));
5137 set_ia32_op_type(store, ia32_AddrModeD);
5138 set_ia32_ls_mode(store, mode_Bu);
5139 set_address(store, &addr);
5143 trampoline = be_transform_node(ptr);
5145 /* the callee is typically an immediate */
5146 if (is_SymConst(callee)) {
5147 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5149 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5151 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5153 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5154 addr.index, addr.mem, rel);
5155 set_irn_pinned(store, get_irn_pinned(node));
5156 set_ia32_op_type(store, ia32_AddrModeD);
5157 set_ia32_ls_mode(store, mode_Iu);
5158 set_address(store, &addr);
5163 return new_r_Tuple(new_block, 2, in);
5167 * Transform Builtin node.
5169 static ir_node *gen_Builtin(ir_node *node) {
5170 ir_builtin_kind kind = get_Builtin_kind(node);
5174 return gen_trap(node);
5175 case ir_bk_debugbreak:
5176 return gen_debugbreak(node);
5177 case ir_bk_return_address:
5178 return gen_return_address(node);
5179 case ir_bk_frame_address:
5180 return gen_frame_address(node);
5181 case ir_bk_prefetch:
5182 return gen_prefetch(node);
5184 return gen_ffs(node);
5186 return gen_clz(node);
5188 return gen_ctz(node);
5190 return gen_parity(node);
5191 case ir_bk_popcount:
5192 return gen_popcount(node);
5194 return gen_bswap(node);
5196 return gen_outport(node);
5198 return gen_inport(node);
5199 case ir_bk_inner_trampoline:
5200 return gen_inner_trampoline(node);
5202 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5206 * Transform Proj(Builtin) node.
5208 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5209 ir_node *node = get_Proj_pred(proj);
5210 ir_node *new_node = be_transform_node(node);
5211 ir_builtin_kind kind = get_Builtin_kind(node);
5214 case ir_bk_return_address:
5215 case ir_bk_frame_address:
5220 case ir_bk_popcount:
5222 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5225 case ir_bk_debugbreak:
5226 case ir_bk_prefetch:
5228 assert(get_Proj_proj(proj) == pn_Builtin_M);
5231 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5232 return new_r_Proj(get_nodes_block(new_node),
5233 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5235 assert(get_Proj_proj(proj) == pn_Builtin_M);
5236 return new_r_Proj(get_nodes_block(new_node),
5237 new_node, mode_M, pn_ia32_Inport_M);
5239 case ir_bk_inner_trampoline:
5240 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5241 return get_Tuple_pred(new_node, 1);
5243 assert(get_Proj_proj(proj) == pn_Builtin_M);
5244 return get_Tuple_pred(new_node, 0);
5247 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5250 static ir_node *gen_be_IncSP(ir_node *node)
5252 ir_node *res = be_duplicate_node(node);
5253 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5259 * Transform the Projs from a be_Call.
5261 static ir_node *gen_Proj_be_Call(ir_node *node)
5263 ir_node *block = be_transform_node(get_nodes_block(node));
5264 ir_node *call = get_Proj_pred(node);
5265 ir_node *new_call = be_transform_node(call);
5266 dbg_info *dbgi = get_irn_dbg_info(node);
5267 long proj = get_Proj_proj(node);
5268 ir_mode *mode = get_irn_mode(node);
5271 if (proj == pn_be_Call_M_regular) {
5272 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5274 /* transform call modes */
5275 if (mode_is_data(mode)) {
5276 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5280 /* Map from be_Call to ia32_Call proj number */
5281 if (proj == pn_be_Call_sp) {
5282 proj = pn_ia32_Call_stack;
5283 } else if (proj == pn_be_Call_M_regular) {
5284 proj = pn_ia32_Call_M;
5286 arch_register_req_t const *const req = arch_get_register_req_out(node);
5287 int const n_outs = arch_irn_get_n_outs(new_call);
5290 assert(proj >= pn_be_Call_first_res);
5291 assert(req->type & arch_register_req_type_limited);
5293 for (i = 0; i < n_outs; ++i) {
5294 arch_register_req_t const *const new_req
5295 = arch_get_out_register_req(new_call, i);
5297 if (!(new_req->type & arch_register_req_type_limited) ||
5298 new_req->cls != req->cls ||
5299 *new_req->limited != *req->limited)
5308 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5310 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5312 case pn_ia32_Call_stack:
5313 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5316 case pn_ia32_Call_fpcw:
5317 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5325 * Transform the Projs from a Cmp.
5327 static ir_node *gen_Proj_Cmp(ir_node *node)
5329 /* this probably means not all mode_b nodes were lowered... */
5330 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5335 * Transform the Projs from a Bound.
5337 static ir_node *gen_Proj_Bound(ir_node *node)
5339 ir_node *new_node, *block;
5340 ir_node *pred = get_Proj_pred(node);
5342 switch (get_Proj_proj(node)) {
5344 return be_transform_node(get_Bound_mem(pred));
5345 case pn_Bound_X_regular:
5346 new_node = be_transform_node(pred);
5347 block = get_nodes_block(new_node);
5348 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5349 case pn_Bound_X_except:
5350 new_node = be_transform_node(pred);
5351 block = get_nodes_block(new_node);
5352 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5354 return be_transform_node(get_Bound_index(pred));
5356 panic("unsupported Proj from Bound");
5360 static ir_node *gen_Proj_ASM(ir_node *node)
5362 ir_mode *mode = get_irn_mode(node);
5363 ir_node *pred = get_Proj_pred(node);
5364 ir_node *new_pred = be_transform_node(pred);
5365 ir_node *block = get_nodes_block(new_pred);
5366 long pos = get_Proj_proj(node);
5368 if (mode == mode_M) {
5369 pos = arch_irn_get_n_outs(new_pred)-1;
5370 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5372 } else if (mode_is_float(mode)) {
5375 panic("unexpected proj mode at ASM");
5378 return new_r_Proj(block, new_pred, mode, pos);
5382 * Transform and potentially renumber Proj nodes.
5384 static ir_node *gen_Proj(ir_node *node)
5386 ir_node *pred = get_Proj_pred(node);
5389 switch (get_irn_opcode(pred)) {
5391 proj = get_Proj_proj(node);
5392 if (proj == pn_Store_M) {
5393 return be_transform_node(pred);
5395 panic("No idea how to transform proj->Store");
5398 return gen_Proj_Load(node);
5400 return gen_Proj_ASM(node);
5402 return gen_Proj_Builtin(node);
5406 return gen_Proj_DivMod(node);
5408 return gen_Proj_CopyB(node);
5410 return gen_Proj_Quot(node);
5412 return gen_Proj_be_SubSP(node);
5414 return gen_Proj_be_AddSP(node);
5416 return gen_Proj_be_Call(node);
5418 return gen_Proj_Cmp(node);
5420 return gen_Proj_Bound(node);
5422 proj = get_Proj_proj(node);
5424 case pn_Start_X_initial_exec: {
5425 ir_node *block = get_nodes_block(pred);
5426 ir_node *new_block = be_transform_node(block);
5427 dbg_info *dbgi = get_irn_dbg_info(node);
5428 /* we exchange the ProjX with a jump */
5429 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5434 case pn_Start_P_tls:
5435 return gen_Proj_tls(node);
5440 if (is_ia32_l_FloattoLL(pred)) {
5441 return gen_Proj_l_FloattoLL(node);
5443 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5447 ir_mode *mode = get_irn_mode(node);
5448 if (ia32_mode_needs_gp_reg(mode)) {
5449 ir_node *new_pred = be_transform_node(pred);
5450 ir_node *block = be_transform_node(get_nodes_block(node));
5451 ir_node *new_proj = new_r_Proj(block, new_pred,
5452 mode_Iu, get_Proj_proj(node));
5453 new_proj->node_nr = node->node_nr;
5458 return be_duplicate_node(node);
5462 * Enters all transform functions into the generic pointer
5464 static void register_transformers(void)
5466 /* first clear the generic function pointer for all ops */
5467 clear_irp_opcodes_generic_func();
5469 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5470 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5510 /* transform ops from intrinsic lowering */
5522 GEN(ia32_l_LLtoFloat);
5523 GEN(ia32_l_FloattoLL);
5529 /* we should never see these nodes */
5544 /* handle builtins */
5547 /* handle generic backend nodes */
5561 * Pre-transform all unknown and noreg nodes.
5563 static void ia32_pretransform_node(void)
5565 ia32_code_gen_t *cg = env_cg;
5567 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5568 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5569 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5570 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5571 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5572 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5574 nomem = get_irg_no_mem(current_ir_graph);
5575 noreg_GP = ia32_new_NoReg_gp(cg);
5581 * Walker, checks if all ia32 nodes producing more than one result have their
5582 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5584 static void add_missing_keep_walker(ir_node *node, void *data)
5587 unsigned found_projs = 0;
5588 const ir_edge_t *edge;
5589 ir_mode *mode = get_irn_mode(node);
5594 if (!is_ia32_irn(node))
5597 n_outs = arch_irn_get_n_outs(node);
5600 if (is_ia32_SwitchJmp(node))
5603 assert(n_outs < (int) sizeof(unsigned) * 8);
5604 foreach_out_edge(node, edge) {
5605 ir_node *proj = get_edge_src_irn(edge);
5608 /* The node could be kept */
5612 if (get_irn_mode(proj) == mode_M)
5615 pn = get_Proj_proj(proj);
5616 assert(pn < n_outs);
5617 found_projs |= 1 << pn;
5621 /* are keeps missing? */
5623 for (i = 0; i < n_outs; ++i) {
5626 const arch_register_req_t *req;
5627 const arch_register_class_t *cls;
5629 if (found_projs & (1 << i)) {
5633 req = arch_get_out_register_req(node, i);
5638 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5642 block = get_nodes_block(node);
5643 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5644 if (last_keep != NULL) {
5645 be_Keep_add_node(last_keep, cls, in[0]);
5647 last_keep = be_new_Keep(block, 1, in);
5648 if (sched_is_scheduled(node)) {
5649 sched_add_after(node, last_keep);
5656 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5659 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5661 ir_graph *irg = be_get_birg_irg(cg->birg);
5662 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5666 * Post-process all calls if we are in SSE mode.
5667 * The ABI requires that the results are in st0, copy them
5668 * to a xmm register.
5670 static void postprocess_fp_call_results(void) {
5673 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5674 ir_node *call = call_list[i];
5675 ir_type *mtp = call_types[i];
5678 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5679 ir_type *res_tp = get_method_res_type(mtp, j);
5680 ir_node *res, *new_res;
5681 const ir_edge_t *edge, *next;
5684 if (! is_atomic_type(res_tp)) {
5685 /* no floating point return */
5688 mode = get_type_mode(res_tp);
5689 if (! mode_is_float(mode)) {
5690 /* no floating point return */
5694 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5697 /* now patch the users */
5698 foreach_out_edge_safe(res, edge, next) {
5699 ir_node *succ = get_edge_src_irn(edge);
5702 if (be_is_Keep(succ))
5705 if (is_ia32_xStore(succ)) {
5706 /* an xStore can be patched into an vfst */
5707 dbg_info *db = get_irn_dbg_info(succ);
5708 ir_node *block = get_nodes_block(succ);
5709 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5710 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5711 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5712 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5713 ir_mode *mode = get_ia32_ls_mode(succ);
5715 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5716 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5717 if (is_ia32_use_frame(succ))
5718 set_ia32_use_frame(st);
5719 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5720 set_irn_pinned(st, get_irn_pinned(succ));
5721 set_ia32_op_type(st, ia32_AddrModeD);
5725 if (new_res == NULL) {
5726 dbg_info *db = get_irn_dbg_info(call);
5727 ir_node *block = get_nodes_block(call);
5728 ir_node *frame = get_irg_frame(current_ir_graph);
5729 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5730 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5731 ir_node *vfst, *xld, *new_mem;
5733 /* store st(0) on stack */
5734 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5735 set_ia32_op_type(vfst, ia32_AddrModeD);
5736 set_ia32_use_frame(vfst);
5738 /* load into SSE register */
5739 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5740 set_ia32_op_type(xld, ia32_AddrModeS);
5741 set_ia32_use_frame(xld);
5743 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5744 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5746 if (old_mem != NULL) {
5747 edges_reroute(old_mem, new_mem, current_ir_graph);
5751 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5758 /* do the transformation */
5759 void ia32_transform_graph(ia32_code_gen_t *cg)
5763 register_transformers();
5765 initial_fpcw = NULL;
5768 be_timer_push(T_HEIGHTS);
5769 heights = heights_new(cg->irg);
5770 be_timer_pop(T_HEIGHTS);
5771 ia32_calculate_non_address_mode_nodes(cg->birg);
5773 /* the transform phase is not safe for CSE (yet) because several nodes get
5774 * attributes set after their creation */
5775 cse_last = get_opt_cse();
5778 call_list = NEW_ARR_F(ir_node *, 0);
5779 call_types = NEW_ARR_F(ir_type *, 0);
5780 be_transform_graph(cg->birg, ia32_pretransform_node);
5782 if (ia32_cg_config.use_sse2)
5783 postprocess_fp_call_results();
5784 DEL_ARR_F(call_types);
5785 DEL_ARR_F(call_list);
5787 set_opt_cse(cse_last);
5789 ia32_free_non_address_mode_nodes();
5790 heights_free(heights);
5794 void ia32_init_transform(void)
5796 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");