2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
231 if (ia32_cg_config.use_sse2) {
232 tarval *tv = get_Const_tarval(node);
233 if (tarval_is_null(tv)) {
234 load = new_bd_ia32_xZero(dbgi, block);
235 set_ia32_ls_mode(load, mode);
237 #ifdef CONSTRUCT_SSE_CONST
238 } else if (tarval_is_one(tv)) {
239 int cnst = mode == mode_F ? 26 : 55;
240 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
241 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
242 ir_node *pslld, *psrld;
244 load = new_bd_ia32_xAllOnes(dbgi, block);
245 set_ia32_ls_mode(load, mode);
246 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
247 set_ia32_ls_mode(pslld, mode);
248 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
249 set_ia32_ls_mode(psrld, mode);
251 #endif /* CONSTRUCT_SSE_CONST */
252 } else if (mode == mode_F) {
253 /* we can place any 32bit constant by using a movd gp, sse */
254 unsigned val = get_tarval_sub_bits(tv, 0) |
255 (get_tarval_sub_bits(tv, 1) << 8) |
256 (get_tarval_sub_bits(tv, 2) << 16) |
257 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
259 load = new_bd_ia32_xMovd(dbgi, block, cnst);
260 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 be_dep_on_frame(load);
331 } else { /* non-float mode */
333 tarval *tv = get_Const_tarval(node);
336 tv = tarval_convert_to(tv, mode_Iu);
338 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
340 panic("couldn't convert constant tarval (%+F)", node);
342 val = get_tarval_long(tv);
344 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
345 SET_IA32_ORIG_NODE(cnst, node);
347 be_dep_on_frame(cnst);
353 * Transforms a SymConst.
355 static ir_node *gen_SymConst(ir_node *node)
357 ir_node *old_block = get_nodes_block(node);
358 ir_node *block = be_transform_node(old_block);
359 dbg_info *dbgi = get_irn_dbg_info(node);
360 ir_mode *mode = get_irn_mode(node);
363 if (mode_is_float(mode)) {
364 if (ia32_cg_config.use_sse2)
365 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
368 set_ia32_am_sc(cnst, get_SymConst_entity(node));
369 set_ia32_use_frame(cnst);
373 if (get_SymConst_kind(node) != symconst_addr_ent) {
374 panic("backend only support symconst_addr_ent (at %+F)", node);
376 entity = get_SymConst_entity(node);
377 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
380 SET_IA32_ORIG_NODE(cnst, node);
382 be_dep_on_frame(cnst);
387 * Create a float type for the given mode and cache it.
389 * @param mode the mode for the float type (might be integer mode for SSE2 types)
390 * @param align alignment
392 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
398 if (mode == mode_Iu) {
399 static ir_type *int_Iu[16] = {NULL, };
401 if (int_Iu[align] == NULL) {
402 int_Iu[align] = tp = new_type_primitive(mode);
403 /* set the specified alignment */
404 set_type_alignment_bytes(tp, align);
406 return int_Iu[align];
407 } else if (mode == mode_Lu) {
408 static ir_type *int_Lu[16] = {NULL, };
410 if (int_Lu[align] == NULL) {
411 int_Lu[align] = tp = new_type_primitive(mode);
412 /* set the specified alignment */
413 set_type_alignment_bytes(tp, align);
415 return int_Lu[align];
416 } else if (mode == mode_F) {
417 static ir_type *float_F[16] = {NULL, };
419 if (float_F[align] == NULL) {
420 float_F[align] = tp = new_type_primitive(mode);
421 /* set the specified alignment */
422 set_type_alignment_bytes(tp, align);
424 return float_F[align];
425 } else if (mode == mode_D) {
426 static ir_type *float_D[16] = {NULL, };
428 if (float_D[align] == NULL) {
429 float_D[align] = tp = new_type_primitive(mode);
430 /* set the specified alignment */
431 set_type_alignment_bytes(tp, align);
433 return float_D[align];
435 static ir_type *float_E[16] = {NULL, };
437 if (float_E[align] == NULL) {
438 float_E[align] = tp = new_type_primitive(mode);
439 /* set the specified alignment */
440 set_type_alignment_bytes(tp, align);
442 return float_E[align];
447 * Create a float[2] array type for the given atomic type.
449 * @param tp the atomic type
451 static ir_type *ia32_create_float_array(ir_type *tp)
453 ir_mode *mode = get_type_mode(tp);
454 unsigned align = get_type_alignment_bytes(tp);
459 if (mode == mode_F) {
460 static ir_type *float_F[16] = {NULL, };
462 if (float_F[align] != NULL)
463 return float_F[align];
464 arr = float_F[align] = new_type_array(1, tp);
465 } else if (mode == mode_D) {
466 static ir_type *float_D[16] = {NULL, };
468 if (float_D[align] != NULL)
469 return float_D[align];
470 arr = float_D[align] = new_type_array(1, tp);
472 static ir_type *float_E[16] = {NULL, };
474 if (float_E[align] != NULL)
475 return float_E[align];
476 arr = float_E[align] = new_type_array(1, tp);
478 set_type_alignment_bytes(arr, align);
479 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
480 set_type_state(arr, layout_fixed);
484 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
485 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
487 static const struct {
488 const char *ent_name;
489 const char *cnst_str;
492 } names [ia32_known_const_max] = {
493 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
494 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
495 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
496 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
497 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
499 static ir_entity *ent_cache[ia32_known_const_max];
501 const char *ent_name, *cnst_str;
507 ent_name = names[kct].ent_name;
508 if (! ent_cache[kct]) {
509 cnst_str = names[kct].cnst_str;
511 switch (names[kct].mode) {
512 case 0: mode = mode_Iu; break;
513 case 1: mode = mode_Lu; break;
514 default: mode = mode_F; break;
516 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
517 tp = ia32_create_float_type(mode, names[kct].align);
519 if (kct == ia32_ULLBIAS)
520 tp = ia32_create_float_array(tp);
521 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
523 set_entity_ld_ident(ent, get_entity_ident(ent));
524 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
525 set_entity_visibility(ent, ir_visibility_private);
527 if (kct == ia32_ULLBIAS) {
528 ir_initializer_t *initializer = create_initializer_compound(2);
530 set_initializer_compound_value(initializer, 0,
531 create_initializer_tarval(get_mode_null(mode)));
532 set_initializer_compound_value(initializer, 1,
533 create_initializer_tarval(tv));
535 set_entity_initializer(ent, initializer);
537 set_entity_initializer(ent, create_initializer_tarval(tv));
540 /* cache the entry */
541 ent_cache[kct] = ent;
544 return ent_cache[kct];
548 * return true if the node is a Proj(Load) and could be used in source address
549 * mode for another node. Will return only true if the @p other node is not
550 * dependent on the memory of the Load (for binary operations use the other
551 * input here, for unary operations use NULL).
553 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
554 ir_node *other, ir_node *other2, match_flags_t flags)
559 /* float constants are always available */
560 if (is_Const(node)) {
561 ir_mode *mode = get_irn_mode(node);
562 if (mode_is_float(mode)) {
563 if (ia32_cg_config.use_sse2) {
564 if (is_simple_sse_Const(node))
567 if (is_simple_x87_Const(node))
570 if (get_irn_n_edges(node) > 1)
578 load = get_Proj_pred(node);
579 pn = get_Proj_proj(node);
580 if (!is_Load(load) || pn != pn_Load_res)
582 if (get_nodes_block(load) != block)
584 /* we only use address mode if we're the only user of the load */
585 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
587 /* in some edge cases with address mode we might reach the load normally
588 * and through some AM sequence, if it is already materialized then we
589 * can't create an AM node from it */
590 if (be_is_transformed(node))
593 /* don't do AM if other node inputs depend on the load (via mem-proj) */
594 if (other != NULL && prevents_AM(block, load, other))
597 if (other2 != NULL && prevents_AM(block, load, other2))
603 typedef struct ia32_address_mode_t ia32_address_mode_t;
604 struct ia32_address_mode_t {
609 ia32_op_type_t op_type;
613 unsigned commutative : 1;
614 unsigned ins_permuted : 1;
617 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
619 /* construct load address */
620 memset(addr, 0, sizeof(addr[0]));
621 ia32_create_address_mode(addr, ptr, 0);
623 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
624 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
625 addr->mem = be_transform_node(mem);
628 static void build_address(ia32_address_mode_t *am, ir_node *node,
629 ia32_create_am_flags_t flags)
631 ia32_address_t *addr = &am->addr;
637 /* floating point immediates */
638 if (is_Const(node)) {
639 ir_entity *entity = create_float_const_entity(node);
640 addr->base = get_symconst_base();
641 addr->index = noreg_GP;
643 addr->symconst_ent = entity;
645 am->ls_mode = get_type_mode(get_entity_type(entity));
646 am->pinned = op_pin_state_floats;
650 load = get_Proj_pred(node);
651 ptr = get_Load_ptr(load);
652 mem = get_Load_mem(load);
653 new_mem = be_transform_node(mem);
654 am->pinned = get_irn_pinned(load);
655 am->ls_mode = get_Load_mode(load);
656 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
659 /* construct load address */
660 ia32_create_address_mode(addr, ptr, flags);
662 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
663 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
667 static void set_address(ir_node *node, const ia32_address_t *addr)
669 set_ia32_am_scale(node, addr->scale);
670 set_ia32_am_sc(node, addr->symconst_ent);
671 set_ia32_am_offs_int(node, addr->offset);
672 if (addr->symconst_sign)
673 set_ia32_am_sc_sign(node);
675 set_ia32_use_frame(node);
676 set_ia32_frame_ent(node, addr->frame_entity);
680 * Apply attributes of a given address mode to a node.
682 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
684 set_address(node, &am->addr);
686 set_ia32_op_type(node, am->op_type);
687 set_ia32_ls_mode(node, am->ls_mode);
688 if (am->pinned == op_pin_state_pinned) {
689 /* beware: some nodes are already pinned and did not allow to change the state */
690 if (get_irn_pinned(node) != op_pin_state_pinned)
691 set_irn_pinned(node, op_pin_state_pinned);
694 set_ia32_commutative(node);
698 * Check, if a given node is a Down-Conv, ie. a integer Conv
699 * from a mode with a mode with more bits to a mode with lesser bits.
700 * Moreover, we return only true if the node has not more than 1 user.
702 * @param node the node
703 * @return non-zero if node is a Down-Conv
705 static int is_downconv(const ir_node *node)
713 /* we only want to skip the conv when we're the only user
714 * (because this test is used in the context of address-mode selection
715 * and we don't want to use address mode for multiple users) */
716 if (get_irn_n_edges(node) > 1)
719 src_mode = get_irn_mode(get_Conv_op(node));
720 dest_mode = get_irn_mode(node);
722 ia32_mode_needs_gp_reg(src_mode) &&
723 ia32_mode_needs_gp_reg(dest_mode) &&
724 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
727 /** Skip all Down-Conv's on a given node and return the resulting node. */
728 ir_node *ia32_skip_downconv(ir_node *node)
730 while (is_downconv(node))
731 node = get_Conv_op(node);
736 static bool is_sameconv(ir_node *node)
744 /* we only want to skip the conv when we're the only user
745 * (because this test is used in the context of address-mode selection
746 * and we don't want to use address mode for multiple users) */
747 if (get_irn_n_edges(node) > 1)
750 src_mode = get_irn_mode(get_Conv_op(node));
751 dest_mode = get_irn_mode(node);
753 ia32_mode_needs_gp_reg(src_mode) &&
754 ia32_mode_needs_gp_reg(dest_mode) &&
755 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
758 /** Skip all signedness convs */
759 static ir_node *ia32_skip_sameconv(ir_node *node)
761 while (is_sameconv(node))
762 node = get_Conv_op(node);
767 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
769 ir_mode *mode = get_irn_mode(node);
774 if (mode_is_signed(mode)) {
779 block = get_nodes_block(node);
780 dbgi = get_irn_dbg_info(node);
782 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
786 * matches operands of a node into ia32 addressing/operand modes. This covers
787 * usage of source address mode, immediates, operations with non 32-bit modes,
789 * The resulting data is filled into the @p am struct. block is the block
790 * of the node whose arguments are matched. op1, op2 are the first and second
791 * input that are matched (op1 may be NULL). other_op is another unrelated
792 * input that is not matched! but which is needed sometimes to check if AM
793 * for op1/op2 is legal.
794 * @p flags describes the supported modes of the operation in detail.
796 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
797 ir_node *op1, ir_node *op2, ir_node *other_op,
800 ia32_address_t *addr = &am->addr;
801 ir_mode *mode = get_irn_mode(op2);
802 int mode_bits = get_mode_size_bits(mode);
803 ir_node *new_op1, *new_op2;
805 unsigned commutative;
806 int use_am_and_immediates;
809 memset(am, 0, sizeof(am[0]));
811 commutative = (flags & match_commutative) != 0;
812 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
813 use_am = (flags & match_am) != 0;
814 use_immediate = (flags & match_immediate) != 0;
815 assert(!use_am_and_immediates || use_immediate);
818 assert(!commutative || op1 != NULL);
819 assert(use_am || !(flags & match_8bit_am));
820 assert(use_am || !(flags & match_16bit_am));
822 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
823 (mode_bits == 16 && !(flags & match_16bit_am))) {
827 /* we can simply skip downconvs for mode neutral nodes: the upper bits
828 * can be random for these operations */
829 if (flags & match_mode_neutral) {
830 op2 = ia32_skip_downconv(op2);
832 op1 = ia32_skip_downconv(op1);
835 op2 = ia32_skip_sameconv(op2);
837 op1 = ia32_skip_sameconv(op1);
841 /* match immediates. firm nodes are normalized: constants are always on the
844 if (!(flags & match_try_am) && use_immediate) {
845 new_op2 = try_create_Immediate(op2, 0);
848 if (new_op2 == NULL &&
849 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
850 build_address(am, op2, 0);
851 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
852 if (mode_is_float(mode)) {
853 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
857 am->op_type = ia32_AddrModeS;
858 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
860 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
862 build_address(am, op1, 0);
864 if (mode_is_float(mode)) {
865 noreg = ia32_new_NoReg_vfp(current_ir_graph);
870 if (new_op2 != NULL) {
873 new_op1 = be_transform_node(op2);
875 am->ins_permuted = 1;
877 am->op_type = ia32_AddrModeS;
880 am->op_type = ia32_Normal;
882 if (flags & match_try_am) {
888 mode = get_irn_mode(op2);
889 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
890 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
892 new_op2 = create_upconv(op2, NULL);
893 am->ls_mode = mode_Iu;
895 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
897 new_op2 = be_transform_node(op2);
898 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
901 if (addr->base == NULL)
902 addr->base = noreg_GP;
903 if (addr->index == NULL)
904 addr->index = noreg_GP;
905 if (addr->mem == NULL)
908 am->new_op1 = new_op1;
909 am->new_op2 = new_op2;
910 am->commutative = commutative;
914 * "Fixes" a node that uses address mode by turning it into mode_T
915 * and returning a pn_ia32_res Proj.
917 * @param node the node
918 * @param am its address mode
920 * @return a Proj(pn_ia32_res) if a memory address mode is used,
923 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
928 if (am->mem_proj == NULL)
931 /* we have to create a mode_T so the old MemProj can attach to us */
932 mode = get_irn_mode(node);
933 load = get_Proj_pred(am->mem_proj);
935 be_set_transformed_node(load, node);
937 if (mode != mode_T) {
938 set_irn_mode(node, mode_T);
939 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
946 * Construct a standard binary operation, set AM and immediate if required.
948 * @param node The original node for which the binop is created
949 * @param op1 The first operand
950 * @param op2 The second operand
951 * @param func The node constructor function
952 * @return The constructed ia32 node.
954 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
955 construct_binop_func *func, match_flags_t flags)
958 ir_node *block, *new_block, *new_node;
959 ia32_address_mode_t am;
960 ia32_address_t *addr = &am.addr;
962 block = get_nodes_block(node);
963 match_arguments(&am, block, op1, op2, NULL, flags);
965 dbgi = get_irn_dbg_info(node);
966 new_block = be_transform_node(block);
967 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
968 am.new_op1, am.new_op2);
969 set_am_attributes(new_node, &am);
970 /* we can't use source address mode anymore when using immediates */
971 if (!(flags & match_am_and_immediates) &&
972 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
973 set_ia32_am_support(new_node, ia32_am_none);
974 SET_IA32_ORIG_NODE(new_node, node);
976 new_node = fix_mem_proj(new_node, &am);
982 * Generic names for the inputs of an ia32 binary op.
985 n_ia32_l_binop_left, /**< ia32 left input */
986 n_ia32_l_binop_right, /**< ia32 right input */
987 n_ia32_l_binop_eflags /**< ia32 eflags input */
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
992 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
993 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
994 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
997 * Construct a binary operation which also consumes the eflags.
999 * @param node The node to transform
1000 * @param func The node constructor function
1001 * @param flags The match flags
1002 * @return The constructor ia32 node
1004 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1005 match_flags_t flags)
1007 ir_node *src_block = get_nodes_block(node);
1008 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1009 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1010 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1012 ir_node *block, *new_node, *new_eflags;
1013 ia32_address_mode_t am;
1014 ia32_address_t *addr = &am.addr;
1016 match_arguments(&am, src_block, op1, op2, eflags, flags);
1018 dbgi = get_irn_dbg_info(node);
1019 block = be_transform_node(src_block);
1020 new_eflags = be_transform_node(eflags);
1021 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1022 am.new_op1, am.new_op2, new_eflags);
1023 set_am_attributes(new_node, &am);
1024 /* we can't use source address mode anymore when using immediates */
1025 if (!(flags & match_am_and_immediates) &&
1026 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1027 set_ia32_am_support(new_node, ia32_am_none);
1028 SET_IA32_ORIG_NODE(new_node, node);
1030 new_node = fix_mem_proj(new_node, &am);
1035 static ir_node *get_fpcw(void)
1038 if (initial_fpcw != NULL)
1039 return initial_fpcw;
1041 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1042 &ia32_fp_cw_regs[REG_FPCW]);
1043 initial_fpcw = be_transform_node(fpcw);
1045 return initial_fpcw;
1049 * Construct a standard binary operation, set AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_binop_float_func *func)
1059 ir_mode *mode = get_irn_mode(node);
1061 ir_node *block, *new_block, *new_node;
1062 ia32_address_mode_t am;
1063 ia32_address_t *addr = &am.addr;
1064 ia32_x87_attr_t *attr;
1065 /* All operations are considered commutative, because there are reverse
1067 match_flags_t flags = match_commutative;
1069 /* happens for div nodes... */
1071 mode = get_divop_resmod(node);
1073 /* cannot use address mode with long double on x87 */
1074 if (get_mode_size_bits(mode) <= 64)
1077 block = get_nodes_block(node);
1078 match_arguments(&am, block, op1, op2, NULL, flags);
1080 dbgi = get_irn_dbg_info(node);
1081 new_block = be_transform_node(block);
1082 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1083 am.new_op1, am.new_op2, get_fpcw());
1084 set_am_attributes(new_node, &am);
1086 attr = get_ia32_x87_attr(new_node);
1087 attr->attr.data.ins_permuted = am.ins_permuted;
1089 SET_IA32_ORIG_NODE(new_node, node);
1091 new_node = fix_mem_proj(new_node, &am);
1097 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1099 * @param op1 The first operand
1100 * @param op2 The second operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1105 construct_shift_func *func,
1106 match_flags_t flags)
1109 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1111 assert(! mode_is_float(get_irn_mode(node)));
1112 assert(flags & match_immediate);
1113 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1115 if (flags & match_mode_neutral) {
1116 op1 = ia32_skip_downconv(op1);
1117 new_op1 = be_transform_node(op1);
1118 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1119 new_op1 = create_upconv(op1, node);
1121 new_op1 = be_transform_node(op1);
1124 /* the shift amount can be any mode that is bigger than 5 bits, since all
1125 * other bits are ignored anyway */
1126 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1127 ir_node *const op = get_Conv_op(op2);
1128 if (mode_is_float(get_irn_mode(op)))
1131 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1133 new_op2 = create_immediate_or_transform(op2, 0);
1135 dbgi = get_irn_dbg_info(node);
1136 block = get_nodes_block(node);
1137 new_block = be_transform_node(block);
1138 new_node = func(dbgi, new_block, new_op1, new_op2);
1139 SET_IA32_ORIG_NODE(new_node, node);
1141 /* lowered shift instruction may have a dependency operand, handle it here */
1142 if (get_irn_arity(node) == 3) {
1143 /* we have a dependency */
1144 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1145 add_irn_dep(new_node, new_dep);
1153 * Construct a standard unary operation, set AM and immediate if required.
1155 * @param op The operand
1156 * @param func The node constructor function
1157 * @return The constructed ia32 node.
1159 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1160 match_flags_t flags)
1163 ir_node *block, *new_block, *new_op, *new_node;
1165 assert(flags == 0 || flags == match_mode_neutral);
1166 if (flags & match_mode_neutral) {
1167 op = ia32_skip_downconv(op);
1170 new_op = be_transform_node(op);
1171 dbgi = get_irn_dbg_info(node);
1172 block = get_nodes_block(node);
1173 new_block = be_transform_node(block);
1174 new_node = func(dbgi, new_block, new_op);
1176 SET_IA32_ORIG_NODE(new_node, node);
1181 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1182 ia32_address_t *addr)
1184 ir_node *base, *index, *res;
1190 base = be_transform_node(base);
1193 index = addr->index;
1194 if (index == NULL) {
1197 index = be_transform_node(index);
1200 res = new_bd_ia32_Lea(dbgi, block, base, index);
1201 set_address(res, addr);
1207 * Returns non-zero if a given address mode has a symbolic or
1208 * numerical offset != 0.
1210 static int am_has_immediates(const ia32_address_t *addr)
1212 return addr->offset != 0 || addr->symconst_ent != NULL
1213 || addr->frame_entity || addr->use_frame;
1217 * Creates an ia32 Add.
1219 * @return the created ia32 Add node
1221 static ir_node *gen_Add(ir_node *node)
1223 ir_mode *mode = get_irn_mode(node);
1224 ir_node *op1 = get_Add_left(node);
1225 ir_node *op2 = get_Add_right(node);
1227 ir_node *block, *new_block, *new_node, *add_immediate_op;
1228 ia32_address_t addr;
1229 ia32_address_mode_t am;
1231 if (mode_is_float(mode)) {
1232 if (ia32_cg_config.use_sse2)
1233 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1234 match_commutative | match_am);
1236 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1239 ia32_mark_non_am(node);
1241 op2 = ia32_skip_downconv(op2);
1242 op1 = ia32_skip_downconv(op1);
1246 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1247 * 1. Add with immediate -> Lea
1248 * 2. Add with possible source address mode -> Add
1249 * 3. Otherwise -> Lea
1251 memset(&addr, 0, sizeof(addr));
1252 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1253 add_immediate_op = NULL;
1255 dbgi = get_irn_dbg_info(node);
1256 block = get_nodes_block(node);
1257 new_block = be_transform_node(block);
1260 if (addr.base == NULL && addr.index == NULL) {
1261 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1262 addr.symconst_sign, 0, addr.offset);
1263 be_dep_on_frame(new_node);
1264 SET_IA32_ORIG_NODE(new_node, node);
1267 /* add with immediate? */
1268 if (addr.index == NULL) {
1269 add_immediate_op = addr.base;
1270 } else if (addr.base == NULL && addr.scale == 0) {
1271 add_immediate_op = addr.index;
1274 if (add_immediate_op != NULL) {
1275 if (!am_has_immediates(&addr)) {
1276 #ifdef DEBUG_libfirm
1277 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1280 return be_transform_node(add_immediate_op);
1283 new_node = create_lea_from_address(dbgi, new_block, &addr);
1284 SET_IA32_ORIG_NODE(new_node, node);
1288 /* test if we can use source address mode */
1289 match_arguments(&am, block, op1, op2, NULL, match_commutative
1290 | match_mode_neutral | match_am | match_immediate | match_try_am);
1292 /* construct an Add with source address mode */
1293 if (am.op_type == ia32_AddrModeS) {
1294 ia32_address_t *am_addr = &am.addr;
1295 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1296 am_addr->index, am_addr->mem, am.new_op1,
1298 set_am_attributes(new_node, &am);
1299 SET_IA32_ORIG_NODE(new_node, node);
1301 new_node = fix_mem_proj(new_node, &am);
1306 /* otherwise construct a lea */
1307 new_node = create_lea_from_address(dbgi, new_block, &addr);
1308 SET_IA32_ORIG_NODE(new_node, node);
1313 * Creates an ia32 Mul.
1315 * @return the created ia32 Mul node
1317 static ir_node *gen_Mul(ir_node *node)
1319 ir_node *op1 = get_Mul_left(node);
1320 ir_node *op2 = get_Mul_right(node);
1321 ir_mode *mode = get_irn_mode(node);
1323 if (mode_is_float(mode)) {
1324 if (ia32_cg_config.use_sse2)
1325 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1326 match_commutative | match_am);
1328 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1330 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1331 match_commutative | match_am | match_mode_neutral |
1332 match_immediate | match_am_and_immediates);
1336 * Creates an ia32 Mulh.
1337 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1338 * this result while Mul returns the lower 32 bit.
1340 * @return the created ia32 Mulh node
1342 static ir_node *gen_Mulh(ir_node *node)
1344 dbg_info *dbgi = get_irn_dbg_info(node);
1345 ir_node *op1 = get_Mulh_left(node);
1346 ir_node *op2 = get_Mulh_right(node);
1347 ir_mode *mode = get_irn_mode(node);
1349 ir_node *proj_res_high;
1351 if (get_mode_size_bits(mode) != 32) {
1352 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1355 if (mode_is_signed(mode)) {
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1359 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1360 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1362 return proj_res_high;
1366 * Creates an ia32 And.
1368 * @return The created ia32 And node
1370 static ir_node *gen_And(ir_node *node)
1372 ir_node *op1 = get_And_left(node);
1373 ir_node *op2 = get_And_right(node);
1374 assert(! mode_is_float(get_irn_mode(node)));
1376 /* is it a zero extension? */
1377 if (is_Const(op2)) {
1378 tarval *tv = get_Const_tarval(op2);
1379 long v = get_tarval_long(tv);
1381 if (v == 0xFF || v == 0xFFFF) {
1382 dbg_info *dbgi = get_irn_dbg_info(node);
1383 ir_node *block = get_nodes_block(node);
1390 assert(v == 0xFFFF);
1393 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1398 return gen_binop(node, op1, op2, new_bd_ia32_And,
1399 match_commutative | match_mode_neutral | match_am | match_immediate);
1405 * Creates an ia32 Or.
1407 * @return The created ia32 Or node
1409 static ir_node *gen_Or(ir_node *node)
1411 ir_node *op1 = get_Or_left(node);
1412 ir_node *op2 = get_Or_right(node);
1414 assert (! mode_is_float(get_irn_mode(node)));
1415 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1416 | match_mode_neutral | match_am | match_immediate);
1422 * Creates an ia32 Eor.
1424 * @return The created ia32 Eor node
1426 static ir_node *gen_Eor(ir_node *node)
1428 ir_node *op1 = get_Eor_left(node);
1429 ir_node *op2 = get_Eor_right(node);
1431 assert(! mode_is_float(get_irn_mode(node)));
1432 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1433 | match_mode_neutral | match_am | match_immediate);
1438 * Creates an ia32 Sub.
1440 * @return The created ia32 Sub node
1442 static ir_node *gen_Sub(ir_node *node)
1444 ir_node *op1 = get_Sub_left(node);
1445 ir_node *op2 = get_Sub_right(node);
1446 ir_mode *mode = get_irn_mode(node);
1448 if (mode_is_float(mode)) {
1449 if (ia32_cg_config.use_sse2)
1450 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1452 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1455 if (is_Const(op2)) {
1456 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1460 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1461 | match_am | match_immediate);
1464 static ir_node *transform_AM_mem(ir_node *const block,
1465 ir_node *const src_val,
1466 ir_node *const src_mem,
1467 ir_node *const am_mem)
1469 if (is_NoMem(am_mem)) {
1470 return be_transform_node(src_mem);
1471 } else if (is_Proj(src_val) &&
1473 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1474 /* avoid memory loop */
1476 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1477 ir_node *const ptr_pred = get_Proj_pred(src_val);
1478 int const arity = get_Sync_n_preds(src_mem);
1483 NEW_ARR_A(ir_node*, ins, arity + 1);
1485 /* NOTE: This sometimes produces dead-code because the old sync in
1486 * src_mem might not be used anymore, we should detect this case
1487 * and kill the sync... */
1488 for (i = arity - 1; i >= 0; --i) {
1489 ir_node *const pred = get_Sync_pred(src_mem, i);
1491 /* avoid memory loop */
1492 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1495 ins[n++] = be_transform_node(pred);
1500 return new_r_Sync(block, n, ins);
1504 ins[0] = be_transform_node(src_mem);
1506 return new_r_Sync(block, 2, ins);
1511 * Create a 32bit to 64bit signed extension.
1513 * @param dbgi debug info
1514 * @param block the block where node nodes should be placed
1515 * @param val the value to extend
1516 * @param orig the original node
1518 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1519 ir_node *val, const ir_node *orig)
1524 if (ia32_cg_config.use_short_sex_eax) {
1525 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1526 be_dep_on_frame(pval);
1527 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1529 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1530 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1532 SET_IA32_ORIG_NODE(res, orig);
1537 * Generates an ia32 DivMod with additional infrastructure for the
1538 * register allocator if needed.
1540 static ir_node *create_Div(ir_node *node)
1542 dbg_info *dbgi = get_irn_dbg_info(node);
1543 ir_node *block = get_nodes_block(node);
1544 ir_node *new_block = be_transform_node(block);
1551 ir_node *sign_extension;
1552 ia32_address_mode_t am;
1553 ia32_address_t *addr = &am.addr;
1555 /* the upper bits have random contents for smaller modes */
1556 switch (get_irn_opcode(node)) {
1558 op1 = get_Div_left(node);
1559 op2 = get_Div_right(node);
1560 mem = get_Div_mem(node);
1561 mode = get_Div_resmode(node);
1564 op1 = get_Mod_left(node);
1565 op2 = get_Mod_right(node);
1566 mem = get_Mod_mem(node);
1567 mode = get_Mod_resmode(node);
1570 op1 = get_DivMod_left(node);
1571 op2 = get_DivMod_right(node);
1572 mem = get_DivMod_mem(node);
1573 mode = get_DivMod_resmode(node);
1576 panic("invalid divmod node %+F", node);
1579 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1581 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1582 is the memory of the consumed address. We can have only the second op as address
1583 in Div nodes, so check only op2. */
1584 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1586 if (mode_is_signed(mode)) {
1587 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1588 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1589 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1591 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1592 be_dep_on_frame(sign_extension);
1594 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1595 addr->index, new_mem, am.new_op2,
1596 am.new_op1, sign_extension);
1599 set_irn_pinned(new_node, get_irn_pinned(node));
1601 set_am_attributes(new_node, &am);
1602 SET_IA32_ORIG_NODE(new_node, node);
1604 new_node = fix_mem_proj(new_node, &am);
1610 * Generates an ia32 Mod.
1612 static ir_node *gen_Mod(ir_node *node)
1614 return create_Div(node);
1618 * Generates an ia32 Div.
1620 static ir_node *gen_Div(ir_node *node)
1622 return create_Div(node);
1626 * Generates an ia32 DivMod.
1628 static ir_node *gen_DivMod(ir_node *node)
1630 return create_Div(node);
1636 * Creates an ia32 floating Div.
1638 * @return The created ia32 xDiv node
1640 static ir_node *gen_Quot(ir_node *node)
1642 ir_node *op1 = get_Quot_left(node);
1643 ir_node *op2 = get_Quot_right(node);
1645 if (ia32_cg_config.use_sse2) {
1646 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1648 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1654 * Creates an ia32 Shl.
1656 * @return The created ia32 Shl node
1658 static ir_node *gen_Shl(ir_node *node)
1660 ir_node *left = get_Shl_left(node);
1661 ir_node *right = get_Shl_right(node);
1663 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1664 match_mode_neutral | match_immediate);
1668 * Creates an ia32 Shr.
1670 * @return The created ia32 Shr node
1672 static ir_node *gen_Shr(ir_node *node)
1674 ir_node *left = get_Shr_left(node);
1675 ir_node *right = get_Shr_right(node);
1677 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1683 * Creates an ia32 Sar.
1685 * @return The created ia32 Shrs node
1687 static ir_node *gen_Shrs(ir_node *node)
1689 ir_node *left = get_Shrs_left(node);
1690 ir_node *right = get_Shrs_right(node);
1692 if (is_Const(right)) {
1693 tarval *tv = get_Const_tarval(right);
1694 long val = get_tarval_long(tv);
1696 /* this is a sign extension */
1697 dbg_info *dbgi = get_irn_dbg_info(node);
1698 ir_node *block = be_transform_node(get_nodes_block(node));
1699 ir_node *new_op = be_transform_node(left);
1701 return create_sex_32_64(dbgi, block, new_op, node);
1705 /* 8 or 16 bit sign extension? */
1706 if (is_Const(right) && is_Shl(left)) {
1707 ir_node *shl_left = get_Shl_left(left);
1708 ir_node *shl_right = get_Shl_right(left);
1709 if (is_Const(shl_right)) {
1710 tarval *tv1 = get_Const_tarval(right);
1711 tarval *tv2 = get_Const_tarval(shl_right);
1712 if (tv1 == tv2 && tarval_is_long(tv1)) {
1713 long val = get_tarval_long(tv1);
1714 if (val == 16 || val == 24) {
1715 dbg_info *dbgi = get_irn_dbg_info(node);
1716 ir_node *block = get_nodes_block(node);
1726 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1735 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1741 * Creates an ia32 Rol.
1743 * @param op1 The first operator
1744 * @param op2 The second operator
1745 * @return The created ia32 RotL node
1747 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1749 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1755 * Creates an ia32 Ror.
1756 * NOTE: There is no RotR with immediate because this would always be a RotL
1757 * "imm-mode_size_bits" which can be pre-calculated.
1759 * @param op1 The first operator
1760 * @param op2 The second operator
1761 * @return The created ia32 RotR node
1763 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1765 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1771 * Creates an ia32 RotR or RotL (depending on the found pattern).
1773 * @return The created ia32 RotL or RotR node
1775 static ir_node *gen_Rotl(ir_node *node)
1777 ir_node *op1 = get_Rotl_left(node);
1778 ir_node *op2 = get_Rotl_right(node);
1780 if (is_Minus(op2)) {
1781 return gen_Ror(node, op1, get_Minus_op(op2));
1784 return gen_Rol(node, op1, op2);
1790 * Transforms a Minus node.
1792 * @return The created ia32 Minus node
1794 static ir_node *gen_Minus(ir_node *node)
1796 ir_node *op = get_Minus_op(node);
1797 ir_node *block = be_transform_node(get_nodes_block(node));
1798 dbg_info *dbgi = get_irn_dbg_info(node);
1799 ir_mode *mode = get_irn_mode(node);
1804 if (mode_is_float(mode)) {
1805 ir_node *new_op = be_transform_node(op);
1806 if (ia32_cg_config.use_sse2) {
1807 /* TODO: non-optimal... if we have many xXors, then we should
1808 * rather create a load for the const and use that instead of
1809 * several AM nodes... */
1810 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1812 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1813 noreg_GP, nomem, new_op, noreg_xmm);
1815 size = get_mode_size_bits(mode);
1816 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1818 set_ia32_am_sc(new_node, ent);
1819 set_ia32_op_type(new_node, ia32_AddrModeS);
1820 set_ia32_ls_mode(new_node, mode);
1822 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1825 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1828 SET_IA32_ORIG_NODE(new_node, node);
1834 * Transforms a Not node.
1836 * @return The created ia32 Not node
1838 static ir_node *gen_Not(ir_node *node)
1840 ir_node *op = get_Not_op(node);
1842 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1843 assert (! mode_is_float(get_irn_mode(node)));
1845 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1848 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1849 bool negate, ir_node *node)
1851 ir_node *new_block = be_transform_node(block);
1852 ir_mode *mode = get_irn_mode(op);
1858 if (mode_is_float(mode)) {
1859 new_op = be_transform_node(op);
1861 if (ia32_cg_config.use_sse2) {
1862 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1863 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1864 noreg_GP, nomem, new_op, noreg_fp);
1866 size = get_mode_size_bits(mode);
1867 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1869 set_ia32_am_sc(new_node, ent);
1871 SET_IA32_ORIG_NODE(new_node, node);
1873 set_ia32_op_type(new_node, ia32_AddrModeS);
1874 set_ia32_ls_mode(new_node, mode);
1876 /* TODO, implement -Abs case */
1879 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1880 SET_IA32_ORIG_NODE(new_node, node);
1882 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1883 SET_IA32_ORIG_NODE(new_node, node);
1888 ir_node *sign_extension;
1890 if (get_mode_size_bits(mode) == 32) {
1891 new_op = be_transform_node(op);
1893 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1896 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1898 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1899 nomem, new_op, sign_extension);
1900 SET_IA32_ORIG_NODE(xor, node);
1903 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1904 nomem, sign_extension, xor);
1906 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1907 nomem, xor, sign_extension);
1909 SET_IA32_ORIG_NODE(new_node, node);
1916 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1918 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1920 dbg_info *dbgi = get_irn_dbg_info(cmp);
1921 ir_node *block = get_nodes_block(cmp);
1922 ir_node *new_block = be_transform_node(block);
1923 ir_node *op1 = be_transform_node(x);
1924 ir_node *op2 = be_transform_node(n);
1926 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1930 * Transform a node returning a "flag" result.
1932 * @param node the node to transform
1933 * @param pnc_out the compare mode to use
1935 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1942 /* we have a Cmp as input */
1943 if (is_Proj(node)) {
1944 ir_node *pred = get_Proj_pred(node);
1946 pn_Cmp pnc = get_Proj_proj(node);
1947 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1948 ir_node *l = get_Cmp_left(pred);
1949 ir_node *r = get_Cmp_right(pred);
1951 ir_node *la = get_And_left(l);
1952 ir_node *ra = get_And_right(l);
1954 ir_node *c = get_Shl_left(la);
1955 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1956 /* (1 << n) & ra) */
1957 ir_node *n = get_Shl_right(la);
1958 flags = gen_bt(pred, ra, n);
1959 /* we must generate a Jc/Jnc jump */
1960 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1963 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1968 ir_node *c = get_Shl_left(ra);
1969 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1970 /* la & (1 << n)) */
1971 ir_node *n = get_Shl_right(ra);
1972 flags = gen_bt(pred, la, n);
1973 /* we must generate a Jc/Jnc jump */
1974 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1977 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1983 /* add ia32 compare flags */
1985 ir_node *l = get_Cmp_left(pred);
1986 ir_mode *mode = get_irn_mode(l);
1987 if (mode_is_float(mode))
1988 pnc |= ia32_pn_Cmp_float;
1989 else if (! mode_is_signed(mode))
1990 pnc |= ia32_pn_Cmp_unsigned;
1993 flags = be_transform_node(pred);
1998 /* a mode_b value, we have to compare it against 0 */
1999 dbgi = get_irn_dbg_info(node);
2000 new_block = be_transform_node(get_nodes_block(node));
2001 new_op = be_transform_node(node);
2002 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2003 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2004 *pnc_out = pn_Cmp_Lg;
2009 * Transforms a Load.
2011 * @return the created ia32 Load node
2013 static ir_node *gen_Load(ir_node *node)
2015 ir_node *old_block = get_nodes_block(node);
2016 ir_node *block = be_transform_node(old_block);
2017 ir_node *ptr = get_Load_ptr(node);
2018 ir_node *mem = get_Load_mem(node);
2019 ir_node *new_mem = be_transform_node(mem);
2022 dbg_info *dbgi = get_irn_dbg_info(node);
2023 ir_mode *mode = get_Load_mode(node);
2025 ia32_address_t addr;
2027 /* construct load address */
2028 memset(&addr, 0, sizeof(addr));
2029 ia32_create_address_mode(&addr, ptr, 0);
2036 base = be_transform_node(base);
2039 if (index == NULL) {
2042 index = be_transform_node(index);
2045 if (mode_is_float(mode)) {
2046 if (ia32_cg_config.use_sse2) {
2047 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2050 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2054 assert(mode != mode_b);
2056 /* create a conv node with address mode for smaller modes */
2057 if (get_mode_size_bits(mode) < 32) {
2058 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2059 new_mem, noreg_GP, mode);
2061 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2065 set_irn_pinned(new_node, get_irn_pinned(node));
2066 set_ia32_op_type(new_node, ia32_AddrModeS);
2067 set_ia32_ls_mode(new_node, mode);
2068 set_address(new_node, &addr);
2070 if (get_irn_pinned(node) == op_pin_state_floats) {
2071 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2072 && pn_ia32_vfld_res == pn_ia32_Load_res
2073 && pn_ia32_Load_res == pn_ia32_res);
2074 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2077 SET_IA32_ORIG_NODE(new_node, node);
2079 be_dep_on_frame(new_node);
2083 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2084 ir_node *ptr, ir_node *other)
2091 /* we only use address mode if we're the only user of the load */
2092 if (get_irn_n_edges(node) > 1)
2095 load = get_Proj_pred(node);
2098 if (get_nodes_block(load) != block)
2101 /* store should have the same pointer as the load */
2102 if (get_Load_ptr(load) != ptr)
2105 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2106 if (other != NULL &&
2107 get_nodes_block(other) == block &&
2108 heights_reachable_in_block(heights, other, load)) {
2112 if (prevents_AM(block, load, mem))
2114 /* Store should be attached to the load via mem */
2115 assert(heights_reachable_in_block(heights, mem, load));
2120 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2121 ir_node *mem, ir_node *ptr, ir_mode *mode,
2122 construct_binop_dest_func *func,
2123 construct_binop_dest_func *func8bit,
2124 match_flags_t flags)
2126 ir_node *src_block = get_nodes_block(node);
2134 ia32_address_mode_t am;
2135 ia32_address_t *addr = &am.addr;
2136 memset(&am, 0, sizeof(am));
2138 assert(flags & match_immediate); /* there is no destam node without... */
2139 commutative = (flags & match_commutative) != 0;
2141 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2142 build_address(&am, op1, ia32_create_am_double_use);
2143 new_op = create_immediate_or_transform(op2, 0);
2144 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2145 build_address(&am, op2, ia32_create_am_double_use);
2146 new_op = create_immediate_or_transform(op1, 0);
2151 if (addr->base == NULL)
2152 addr->base = noreg_GP;
2153 if (addr->index == NULL)
2154 addr->index = noreg_GP;
2155 if (addr->mem == NULL)
2158 dbgi = get_irn_dbg_info(node);
2159 block = be_transform_node(src_block);
2160 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2162 if (get_mode_size_bits(mode) == 8) {
2163 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2165 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2167 set_address(new_node, addr);
2168 set_ia32_op_type(new_node, ia32_AddrModeD);
2169 set_ia32_ls_mode(new_node, mode);
2170 SET_IA32_ORIG_NODE(new_node, node);
2172 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2173 mem_proj = be_transform_node(am.mem_proj);
2174 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2179 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2180 ir_node *ptr, ir_mode *mode,
2181 construct_unop_dest_func *func)
2183 ir_node *src_block = get_nodes_block(node);
2189 ia32_address_mode_t am;
2190 ia32_address_t *addr = &am.addr;
2192 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2195 memset(&am, 0, sizeof(am));
2196 build_address(&am, op, ia32_create_am_double_use);
2198 dbgi = get_irn_dbg_info(node);
2199 block = be_transform_node(src_block);
2200 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2201 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2202 set_address(new_node, addr);
2203 set_ia32_op_type(new_node, ia32_AddrModeD);
2204 set_ia32_ls_mode(new_node, mode);
2205 SET_IA32_ORIG_NODE(new_node, node);
2207 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2208 mem_proj = be_transform_node(am.mem_proj);
2209 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2214 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2216 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2217 return get_negated_pnc(pnc, mode);
2220 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2222 ir_mode *mode = get_irn_mode(node);
2223 ir_node *mux_true = get_Mux_true(node);
2224 ir_node *mux_false = get_Mux_false(node);
2233 ia32_address_t addr;
2235 if (get_mode_size_bits(mode) != 8)
2238 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2240 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2246 cond = get_Mux_sel(node);
2247 flags = get_flags_node(cond, &pnc);
2248 /* we can't handle the float special cases with SetM */
2249 if (pnc & ia32_pn_Cmp_float)
2252 pnc = ia32_get_negated_pnc(pnc);
2254 build_address_ptr(&addr, ptr, mem);
2256 dbgi = get_irn_dbg_info(node);
2257 block = get_nodes_block(node);
2258 new_block = be_transform_node(block);
2259 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2260 addr.index, addr.mem, flags, pnc);
2261 set_address(new_node, &addr);
2262 set_ia32_op_type(new_node, ia32_AddrModeD);
2263 set_ia32_ls_mode(new_node, mode);
2264 SET_IA32_ORIG_NODE(new_node, node);
2269 static ir_node *try_create_dest_am(ir_node *node)
2271 ir_node *val = get_Store_value(node);
2272 ir_node *mem = get_Store_mem(node);
2273 ir_node *ptr = get_Store_ptr(node);
2274 ir_mode *mode = get_irn_mode(val);
2275 unsigned bits = get_mode_size_bits(mode);
2280 /* handle only GP modes for now... */
2281 if (!ia32_mode_needs_gp_reg(mode))
2285 /* store must be the only user of the val node */
2286 if (get_irn_n_edges(val) > 1)
2288 /* skip pointless convs */
2290 ir_node *conv_op = get_Conv_op(val);
2291 ir_mode *pred_mode = get_irn_mode(conv_op);
2292 if (!ia32_mode_needs_gp_reg(pred_mode))
2294 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2302 /* value must be in the same block */
2303 if (get_nodes_block(node) != get_nodes_block(val))
2306 switch (get_irn_opcode(val)) {
2308 op1 = get_Add_left(val);
2309 op2 = get_Add_right(val);
2310 if (ia32_cg_config.use_incdec) {
2311 if (is_Const_1(op2)) {
2312 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2314 } else if (is_Const_Minus_1(op2)) {
2315 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2319 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2320 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2321 match_commutative | match_immediate);
2324 op1 = get_Sub_left(val);
2325 op2 = get_Sub_right(val);
2326 if (is_Const(op2)) {
2327 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2329 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2330 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2334 op1 = get_And_left(val);
2335 op2 = get_And_right(val);
2336 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2337 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2338 match_commutative | match_immediate);
2341 op1 = get_Or_left(val);
2342 op2 = get_Or_right(val);
2343 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2344 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2345 match_commutative | match_immediate);
2348 op1 = get_Eor_left(val);
2349 op2 = get_Eor_right(val);
2350 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2351 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2352 match_commutative | match_immediate);
2355 op1 = get_Shl_left(val);
2356 op2 = get_Shl_right(val);
2357 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2358 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2362 op1 = get_Shr_left(val);
2363 op2 = get_Shr_right(val);
2364 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2365 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2369 op1 = get_Shrs_left(val);
2370 op2 = get_Shrs_right(val);
2371 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2372 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2376 op1 = get_Rotl_left(val);
2377 op2 = get_Rotl_right(val);
2378 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2379 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2382 /* TODO: match ROR patterns... */
2384 new_node = try_create_SetMem(val, ptr, mem);
2388 op1 = get_Minus_op(val);
2389 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2392 /* should be lowered already */
2393 assert(mode != mode_b);
2394 op1 = get_Not_op(val);
2395 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2401 if (new_node != NULL) {
2402 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2403 get_irn_pinned(node) == op_pin_state_pinned) {
2404 set_irn_pinned(new_node, op_pin_state_pinned);
2411 static bool possible_int_mode_for_fp(ir_mode *mode)
2415 if (!mode_is_signed(mode))
2417 size = get_mode_size_bits(mode);
2418 if (size != 16 && size != 32)
2423 static int is_float_to_int_conv(const ir_node *node)
2425 ir_mode *mode = get_irn_mode(node);
2429 if (!possible_int_mode_for_fp(mode))
2434 conv_op = get_Conv_op(node);
2435 conv_mode = get_irn_mode(conv_op);
2437 if (!mode_is_float(conv_mode))
2444 * Transform a Store(floatConst) into a sequence of
2447 * @return the created ia32 Store node
2449 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2451 ir_mode *mode = get_irn_mode(cns);
2452 unsigned size = get_mode_size_bytes(mode);
2453 tarval *tv = get_Const_tarval(cns);
2454 ir_node *block = get_nodes_block(node);
2455 ir_node *new_block = be_transform_node(block);
2456 ir_node *ptr = get_Store_ptr(node);
2457 ir_node *mem = get_Store_mem(node);
2458 dbg_info *dbgi = get_irn_dbg_info(node);
2462 ia32_address_t addr;
2464 assert(size % 4 == 0);
2467 build_address_ptr(&addr, ptr, mem);
2471 get_tarval_sub_bits(tv, ofs) |
2472 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2473 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2474 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2475 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2477 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2478 addr.index, addr.mem, imm);
2480 set_irn_pinned(new_node, get_irn_pinned(node));
2481 set_ia32_op_type(new_node, ia32_AddrModeD);
2482 set_ia32_ls_mode(new_node, mode_Iu);
2483 set_address(new_node, &addr);
2484 SET_IA32_ORIG_NODE(new_node, node);
2487 ins[i++] = new_node;
2492 } while (size != 0);
2495 return new_rd_Sync(dbgi, new_block, i, ins);
2502 * Generate a vfist or vfisttp instruction.
2504 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2505 ir_node *mem, ir_node *val, ir_node **fist)
2509 if (ia32_cg_config.use_fisttp) {
2510 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2511 if other users exists */
2512 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2513 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2514 be_new_Keep(block, 1, &value);
2516 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2519 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2522 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2528 * Transforms a general (no special case) Store.
2530 * @return the created ia32 Store node
2532 static ir_node *gen_general_Store(ir_node *node)
2534 ir_node *val = get_Store_value(node);
2535 ir_mode *mode = get_irn_mode(val);
2536 ir_node *block = get_nodes_block(node);
2537 ir_node *new_block = be_transform_node(block);
2538 ir_node *ptr = get_Store_ptr(node);
2539 ir_node *mem = get_Store_mem(node);
2540 dbg_info *dbgi = get_irn_dbg_info(node);
2541 ir_node *new_val, *new_node, *store;
2542 ia32_address_t addr;
2544 /* check for destination address mode */
2545 new_node = try_create_dest_am(node);
2546 if (new_node != NULL)
2549 /* construct store address */
2550 memset(&addr, 0, sizeof(addr));
2551 ia32_create_address_mode(&addr, ptr, 0);
2553 if (addr.base == NULL) {
2554 addr.base = noreg_GP;
2556 addr.base = be_transform_node(addr.base);
2559 if (addr.index == NULL) {
2560 addr.index = noreg_GP;
2562 addr.index = be_transform_node(addr.index);
2564 addr.mem = be_transform_node(mem);
2566 if (mode_is_float(mode)) {
2567 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2569 while (is_Conv(val) && mode == get_irn_mode(val)) {
2570 ir_node *op = get_Conv_op(val);
2571 if (!mode_is_float(get_irn_mode(op)))
2575 new_val = be_transform_node(val);
2576 if (ia32_cg_config.use_sse2) {
2577 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2578 addr.index, addr.mem, new_val);
2580 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2581 addr.index, addr.mem, new_val, mode);
2584 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2585 val = get_Conv_op(val);
2587 /* TODO: is this optimisation still necessary at all (middleend)? */
2588 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2589 while (is_Conv(val)) {
2590 ir_node *op = get_Conv_op(val);
2591 if (!mode_is_float(get_irn_mode(op)))
2593 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2597 new_val = be_transform_node(val);
2598 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2600 new_val = create_immediate_or_transform(val, 0);
2601 assert(mode != mode_b);
2603 if (get_mode_size_bits(mode) == 8) {
2604 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2605 addr.index, addr.mem, new_val);
2607 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2608 addr.index, addr.mem, new_val);
2613 set_irn_pinned(store, get_irn_pinned(node));
2614 set_ia32_op_type(store, ia32_AddrModeD);
2615 set_ia32_ls_mode(store, mode);
2617 set_address(store, &addr);
2618 SET_IA32_ORIG_NODE(store, node);
2624 * Transforms a Store.
2626 * @return the created ia32 Store node
2628 static ir_node *gen_Store(ir_node *node)
2630 ir_node *val = get_Store_value(node);
2631 ir_mode *mode = get_irn_mode(val);
2633 if (mode_is_float(mode) && is_Const(val)) {
2634 /* We can transform every floating const store
2635 into a sequence of integer stores.
2636 If the constant is already in a register,
2637 it would be better to use it, but we don't
2638 have this information here. */
2639 return gen_float_const_Store(node, val);
2641 return gen_general_Store(node);
2645 * Transforms a Switch.
2647 * @return the created ia32 SwitchJmp node
2649 static ir_node *create_Switch(ir_node *node)
2651 dbg_info *dbgi = get_irn_dbg_info(node);
2652 ir_node *block = be_transform_node(get_nodes_block(node));
2653 ir_node *sel = get_Cond_selector(node);
2654 ir_node *new_sel = be_transform_node(sel);
2655 long switch_min = LONG_MAX;
2656 long switch_max = LONG_MIN;
2657 long default_pn = get_Cond_default_proj(node);
2659 const ir_edge_t *edge;
2661 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2663 /* determine the smallest switch case value */
2664 foreach_out_edge(node, edge) {
2665 ir_node *proj = get_edge_src_irn(edge);
2666 long pn = get_Proj_proj(proj);
2667 if (pn == default_pn)
2670 if (pn < switch_min)
2672 if (pn > switch_max)
2676 if ((unsigned long) (switch_max - switch_min) > 128000) {
2677 panic("Size of switch %+F bigger than 128000", node);
2680 if (switch_min != 0) {
2681 /* if smallest switch case is not 0 we need an additional sub */
2682 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2683 add_ia32_am_offs_int(new_sel, -switch_min);
2684 set_ia32_op_type(new_sel, ia32_AddrModeS);
2686 SET_IA32_ORIG_NODE(new_sel, node);
2689 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2690 SET_IA32_ORIG_NODE(new_node, node);
2696 * Transform a Cond node.
2698 static ir_node *gen_Cond(ir_node *node)
2700 ir_node *block = get_nodes_block(node);
2701 ir_node *new_block = be_transform_node(block);
2702 dbg_info *dbgi = get_irn_dbg_info(node);
2703 ir_node *sel = get_Cond_selector(node);
2704 ir_mode *sel_mode = get_irn_mode(sel);
2705 ir_node *flags = NULL;
2709 if (sel_mode != mode_b) {
2710 return create_Switch(node);
2713 /* we get flags from a Cmp */
2714 flags = get_flags_node(sel, &pnc);
2716 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2717 SET_IA32_ORIG_NODE(new_node, node);
2723 * Transform a be_Copy.
2725 static ir_node *gen_be_Copy(ir_node *node)
2727 ir_node *new_node = be_duplicate_node(node);
2728 ir_mode *mode = get_irn_mode(new_node);
2730 if (ia32_mode_needs_gp_reg(mode)) {
2731 set_irn_mode(new_node, mode_Iu);
2737 static ir_node *create_Fucom(ir_node *node)
2739 dbg_info *dbgi = get_irn_dbg_info(node);
2740 ir_node *block = get_nodes_block(node);
2741 ir_node *new_block = be_transform_node(block);
2742 ir_node *left = get_Cmp_left(node);
2743 ir_node *new_left = be_transform_node(left);
2744 ir_node *right = get_Cmp_right(node);
2748 if (ia32_cg_config.use_fucomi) {
2749 new_right = be_transform_node(right);
2750 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2752 set_ia32_commutative(new_node);
2753 SET_IA32_ORIG_NODE(new_node, node);
2755 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2756 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2758 new_right = be_transform_node(right);
2759 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2762 set_ia32_commutative(new_node);
2764 SET_IA32_ORIG_NODE(new_node, node);
2766 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2767 SET_IA32_ORIG_NODE(new_node, node);
2773 static ir_node *create_Ucomi(ir_node *node)
2775 dbg_info *dbgi = get_irn_dbg_info(node);
2776 ir_node *src_block = get_nodes_block(node);
2777 ir_node *new_block = be_transform_node(src_block);
2778 ir_node *left = get_Cmp_left(node);
2779 ir_node *right = get_Cmp_right(node);
2781 ia32_address_mode_t am;
2782 ia32_address_t *addr = &am.addr;
2784 match_arguments(&am, src_block, left, right, NULL,
2785 match_commutative | match_am);
2787 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2788 addr->mem, am.new_op1, am.new_op2,
2790 set_am_attributes(new_node, &am);
2792 SET_IA32_ORIG_NODE(new_node, node);
2794 new_node = fix_mem_proj(new_node, &am);
2800 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2801 * to fold an and into a test node
2803 static bool can_fold_test_and(ir_node *node)
2805 const ir_edge_t *edge;
2807 /** we can only have eq and lg projs */
2808 foreach_out_edge(node, edge) {
2809 ir_node *proj = get_edge_src_irn(edge);
2810 pn_Cmp pnc = get_Proj_proj(proj);
2811 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2819 * returns true if it is assured, that the upper bits of a node are "clean"
2820 * which means for a 16 or 8 bit value, that the upper bits in the register
2821 * are 0 for unsigned and a copy of the last significant bit for signed
2824 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2826 assert(ia32_mode_needs_gp_reg(mode));
2827 if (get_mode_size_bits(mode) >= 32)
2830 if (is_Proj(transformed_node))
2831 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2833 switch (get_ia32_irn_opcode(transformed_node)) {
2834 case iro_ia32_Conv_I2I:
2835 case iro_ia32_Conv_I2I8Bit: {
2836 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2837 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2839 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2846 if (mode_is_signed(mode)) {
2847 return false; /* TODO handle signed modes */
2849 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2850 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2851 const ia32_immediate_attr_t *attr
2852 = get_ia32_immediate_attr_const(right);
2853 if (attr->symconst == 0 &&
2854 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2858 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2862 /* TODO too conservative if shift amount is constant */
2863 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2866 if (!mode_is_signed(mode)) {
2868 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2869 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2871 /* TODO if one is known to be zero extended, then || is sufficient */
2876 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2877 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2879 case iro_ia32_Const:
2880 case iro_ia32_Immediate: {
2881 const ia32_immediate_attr_t *attr =
2882 get_ia32_immediate_attr_const(transformed_node);
2883 if (mode_is_signed(mode)) {
2884 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2885 return shifted == 0 || shifted == -1;
2887 unsigned long shifted = (unsigned long)attr->offset;
2888 shifted >>= get_mode_size_bits(mode);
2889 return shifted == 0;
2899 * Generate code for a Cmp.
2901 static ir_node *gen_Cmp(ir_node *node)
2903 dbg_info *dbgi = get_irn_dbg_info(node);
2904 ir_node *block = get_nodes_block(node);
2905 ir_node *new_block = be_transform_node(block);
2906 ir_node *left = get_Cmp_left(node);
2907 ir_node *right = get_Cmp_right(node);
2908 ir_mode *cmp_mode = get_irn_mode(left);
2910 ia32_address_mode_t am;
2911 ia32_address_t *addr = &am.addr;
2914 if (mode_is_float(cmp_mode)) {
2915 if (ia32_cg_config.use_sse2) {
2916 return create_Ucomi(node);
2918 return create_Fucom(node);
2922 assert(ia32_mode_needs_gp_reg(cmp_mode));
2924 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2925 cmp_unsigned = !mode_is_signed(cmp_mode);
2926 if (is_Const_0(right) &&
2928 get_irn_n_edges(left) == 1 &&
2929 can_fold_test_and(node)) {
2930 /* Test(and_left, and_right) */
2931 ir_node *and_left = get_And_left(left);
2932 ir_node *and_right = get_And_right(left);
2934 /* matze: code here used mode instead of cmd_mode, I think it is always
2935 * the same as cmp_mode, but I leave this here to see if this is really
2938 assert(get_irn_mode(and_left) == cmp_mode);
2940 match_arguments(&am, block, and_left, and_right, NULL,
2942 match_am | match_8bit_am | match_16bit_am |
2943 match_am_and_immediates | match_immediate);
2945 /* use 32bit compare mode if possible since the opcode is smaller */
2946 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2947 upper_bits_clean(am.new_op2, cmp_mode)) {
2948 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2951 if (get_mode_size_bits(cmp_mode) == 8) {
2952 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2953 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2956 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2957 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2960 /* Cmp(left, right) */
2961 match_arguments(&am, block, left, right, NULL,
2962 match_commutative | match_am | match_8bit_am |
2963 match_16bit_am | match_am_and_immediates |
2965 /* use 32bit compare mode if possible since the opcode is smaller */
2966 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2967 upper_bits_clean(am.new_op2, cmp_mode)) {
2968 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2971 if (get_mode_size_bits(cmp_mode) == 8) {
2972 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2973 addr->index, addr->mem, am.new_op1,
2974 am.new_op2, am.ins_permuted,
2977 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2978 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2981 set_am_attributes(new_node, &am);
2982 set_ia32_ls_mode(new_node, cmp_mode);
2984 SET_IA32_ORIG_NODE(new_node, node);
2986 new_node = fix_mem_proj(new_node, &am);
2991 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2994 dbg_info *dbgi = get_irn_dbg_info(node);
2995 ir_node *block = get_nodes_block(node);
2996 ir_node *new_block = be_transform_node(block);
2997 ir_node *val_true = get_Mux_true(node);
2998 ir_node *val_false = get_Mux_false(node);
3000 ia32_address_mode_t am;
3001 ia32_address_t *addr;
3003 assert(ia32_cg_config.use_cmov);
3004 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3008 match_arguments(&am, block, val_false, val_true, flags,
3009 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3011 if (am.ins_permuted)
3012 pnc = ia32_get_negated_pnc(pnc);
3014 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3015 addr->mem, am.new_op1, am.new_op2, new_flags,
3017 set_am_attributes(new_node, &am);
3019 SET_IA32_ORIG_NODE(new_node, node);
3021 new_node = fix_mem_proj(new_node, &am);
3027 * Creates a ia32 Setcc instruction.
3029 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3030 ir_node *flags, pn_Cmp pnc,
3033 ir_mode *mode = get_irn_mode(orig_node);
3036 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3037 SET_IA32_ORIG_NODE(new_node, orig_node);
3039 /* we might need to conv the result up */
3040 if (get_mode_size_bits(mode) > 8) {
3041 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3042 nomem, new_node, mode_Bu);
3043 SET_IA32_ORIG_NODE(new_node, orig_node);
3050 * Create instruction for an unsigned Difference or Zero.
3052 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3054 ir_mode *mode = get_irn_mode(psi);
3064 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3065 match_mode_neutral | match_am | match_immediate | match_two_users);
3067 block = get_nodes_block(new_node);
3069 if (is_Proj(new_node)) {
3070 sub = get_Proj_pred(new_node);
3071 assert(is_ia32_Sub(sub));
3074 set_irn_mode(sub, mode_T);
3075 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3077 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3079 dbgi = get_irn_dbg_info(psi);
3080 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3081 not = new_bd_ia32_Not(dbgi, block, sbb);
3083 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3084 set_ia32_commutative(new_node);
3089 * Create an const array of two float consts.
3091 * @param c0 the first constant
3092 * @param c1 the second constant
3093 * @param new_mode IN/OUT for the mode of the constants, if NULL
3094 * smallest possible mode will be used
3096 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3099 ir_mode *mode = *new_mode;
3101 ir_initializer_t *initializer;
3102 tarval *tv0 = get_Const_tarval(c0);
3103 tarval *tv1 = get_Const_tarval(c1);
3106 /* detect the best mode for the constants */
3107 mode = get_tarval_mode(tv0);
3109 if (mode != mode_F) {
3110 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3111 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3113 tv0 = tarval_convert_to(tv0, mode);
3114 tv1 = tarval_convert_to(tv1, mode);
3115 } else if (mode != mode_D) {
3116 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3117 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3119 tv0 = tarval_convert_to(tv0, mode);
3120 tv1 = tarval_convert_to(tv1, mode);
3127 tp = ia32_create_float_type(mode, 4);
3128 tp = ia32_create_float_array(tp);
3130 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3132 set_entity_ld_ident(ent, get_entity_ident(ent));
3133 set_entity_visibility(ent, ir_visibility_private);
3134 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3136 initializer = create_initializer_compound(2);
3138 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3139 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3141 set_entity_initializer(ent, initializer);
3148 * Possible transformations for creating a Setcc.
3150 enum setcc_transform_insn {
3163 typedef struct setcc_transform {
3167 enum setcc_transform_insn transform;
3171 } setcc_transform_t;
3174 * Setcc can only handle 0 and 1 result.
3175 * Find a transformation that creates 0 and 1 from
3178 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3179 setcc_transform_t *res)
3185 if (tarval_is_null(t)) {
3189 pnc = ia32_get_negated_pnc(pnc);
3190 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3191 // now, t is the bigger one
3195 pnc = ia32_get_negated_pnc(pnc);
3199 if (! tarval_is_null(f)) {
3200 tarval *t_sub = tarval_sub(t, f, NULL);
3203 res->steps[step].transform = SETCC_TR_ADD;
3205 if (t == tarval_bad)
3206 panic("constant subtract failed");
3207 if (! tarval_is_long(f))
3208 panic("tarval is not long");
3210 res->steps[step].val = get_tarval_long(f);
3212 f = tarval_sub(f, f, NULL);
3213 assert(tarval_is_null(f));
3216 if (tarval_is_one(t)) {
3217 res->steps[step].transform = SETCC_TR_SET;
3218 res->num_steps = ++step;
3222 if (tarval_is_minus_one(t)) {
3223 res->steps[step].transform = SETCC_TR_NEG;
3225 res->steps[step].transform = SETCC_TR_SET;
3226 res->num_steps = ++step;
3229 if (tarval_is_long(t)) {
3230 long v = get_tarval_long(t);
3232 res->steps[step].val = 0;
3235 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3237 res->steps[step].transform = SETCC_TR_LEAxx;
3238 res->steps[step].scale = 3; /* (a << 3) + a */
3241 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3243 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3244 res->steps[step].scale = 3; /* (a << 3) */
3247 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3249 res->steps[step].transform = SETCC_TR_LEAxx;
3250 res->steps[step].scale = 2; /* (a << 2) + a */
3253 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3255 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3256 res->steps[step].scale = 2; /* (a << 2) */
3259 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3261 res->steps[step].transform = SETCC_TR_LEAxx;
3262 res->steps[step].scale = 1; /* (a << 1) + a */
3265 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3267 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3268 res->steps[step].scale = 1; /* (a << 1) */
3271 res->num_steps = step;
3274 if (! tarval_is_single_bit(t)) {
3275 res->steps[step].transform = SETCC_TR_AND;
3276 res->steps[step].val = v;
3278 res->steps[step].transform = SETCC_TR_NEG;
3280 int v = get_tarval_lowest_bit(t);
3283 res->steps[step].transform = SETCC_TR_SHL;
3284 res->steps[step].scale = v;
3288 res->steps[step].transform = SETCC_TR_SET;
3289 res->num_steps = ++step;
3292 panic("tarval is not long");
3296 * Transforms a Mux node into some code sequence.
3298 * @return The transformed node.
3300 static ir_node *gen_Mux(ir_node *node)
3302 dbg_info *dbgi = get_irn_dbg_info(node);
3303 ir_node *block = get_nodes_block(node);
3304 ir_node *new_block = be_transform_node(block);
3305 ir_node *mux_true = get_Mux_true(node);
3306 ir_node *mux_false = get_Mux_false(node);
3307 ir_node *cond = get_Mux_sel(node);
3308 ir_mode *mode = get_irn_mode(node);
3314 assert(get_irn_mode(cond) == mode_b);
3316 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3318 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3321 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3322 if (mode_is_float(mode)) {
3323 ir_node *cmp = get_Proj_pred(cond);
3324 ir_node *cmp_left = get_Cmp_left(cmp);
3325 ir_node *cmp_right = get_Cmp_right(cmp);
3326 pn_Cmp pnc = get_Proj_proj(cond);
3328 if (ia32_cg_config.use_sse2) {
3329 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3330 if (cmp_left == mux_true && cmp_right == mux_false) {
3331 /* Mux(a <= b, a, b) => MIN */
3332 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3333 match_commutative | match_am | match_two_users);
3334 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3335 /* Mux(a <= b, b, a) => MAX */
3336 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3337 match_commutative | match_am | match_two_users);
3339 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3340 if (cmp_left == mux_true && cmp_right == mux_false) {
3341 /* Mux(a >= b, a, b) => MAX */
3342 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3343 match_commutative | match_am | match_two_users);
3344 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3345 /* Mux(a >= b, b, a) => MIN */
3346 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3347 match_commutative | match_am | match_two_users);
3352 if (is_Const(mux_true) && is_Const(mux_false)) {
3353 ia32_address_mode_t am;
3358 flags = get_flags_node(cond, &pnc);
3359 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3361 if (ia32_cg_config.use_sse2) {
3362 /* cannot load from different mode on SSE */
3365 /* x87 can load any mode */
3369 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3371 switch (get_mode_size_bytes(new_mode)) {
3381 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3382 set_ia32_am_scale(new_node, 2);
3387 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3388 set_ia32_am_scale(new_node, 1);
3391 /* arg, shift 16 NOT supported */
3393 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3396 panic("Unsupported constant size");
3399 am.ls_mode = new_mode;
3400 am.addr.base = get_symconst_base();
3401 am.addr.index = new_node;
3402 am.addr.mem = nomem;
3404 am.addr.scale = scale;
3405 am.addr.use_frame = 0;
3406 am.addr.frame_entity = NULL;
3407 am.addr.symconst_sign = 0;
3408 am.mem_proj = am.addr.mem;
3409 am.op_type = ia32_AddrModeS;
3412 am.pinned = op_pin_state_floats;
3414 am.ins_permuted = 0;
3416 if (ia32_cg_config.use_sse2)
3417 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3419 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3420 set_am_attributes(load, &am);
3422 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3424 panic("cannot transform floating point Mux");
3427 assert(ia32_mode_needs_gp_reg(mode));
3429 if (is_Proj(cond)) {
3430 ir_node *cmp = get_Proj_pred(cond);
3432 ir_node *cmp_left = get_Cmp_left(cmp);
3433 ir_node *cmp_right = get_Cmp_right(cmp);
3434 ir_node *val_true = mux_true;
3435 ir_node *val_false = mux_false;
3436 pn_Cmp pnc = get_Proj_proj(cond);
3438 if (is_Const(val_true) && is_Const_null(val_true)) {
3439 ir_node *tmp = val_false;
3440 val_false = val_true;
3442 pnc = ia32_get_negated_pnc(pnc);
3444 if (is_Const_0(val_false) && is_Sub(val_true)) {
3445 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3446 && get_Sub_left(val_true) == cmp_left
3447 && get_Sub_right(val_true) == cmp_right) {
3448 return create_doz(node, cmp_left, cmp_right);
3450 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3451 && get_Sub_left(val_true) == cmp_right
3452 && get_Sub_right(val_true) == cmp_left) {
3453 return create_doz(node, cmp_right, cmp_left);
3459 flags = get_flags_node(cond, &pnc);
3461 if (is_Const(mux_true) && is_Const(mux_false)) {
3462 /* both are const, good */
3463 tarval *tv_true = get_Const_tarval(mux_true);
3464 tarval *tv_false = get_Const_tarval(mux_false);
3465 setcc_transform_t res;
3468 find_const_transform(pnc, tv_true, tv_false, &res);
3470 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3473 switch (res.steps[step].transform) {
3475 imm = ia32_immediate_from_long(res.steps[step].val);
3476 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3478 case SETCC_TR_ADDxx:
3479 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3482 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3483 set_ia32_am_scale(new_node, res.steps[step].scale);
3484 set_ia32_am_offs_int(new_node, res.steps[step].val);
3486 case SETCC_TR_LEAxx:
3487 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3488 set_ia32_am_scale(new_node, res.steps[step].scale);
3489 set_ia32_am_offs_int(new_node, res.steps[step].val);
3492 imm = ia32_immediate_from_long(res.steps[step].scale);
3493 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3496 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3499 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3502 imm = ia32_immediate_from_long(res.steps[step].val);
3503 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3506 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3509 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3512 panic("unknown setcc transform");
3516 new_node = create_CMov(node, cond, flags, pnc);
3524 * Create a conversion from x87 state register to general purpose.
3526 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3528 ir_node *block = be_transform_node(get_nodes_block(node));
3529 ir_node *op = get_Conv_op(node);
3530 ir_node *new_op = be_transform_node(op);
3531 ir_graph *irg = current_ir_graph;
3532 dbg_info *dbgi = get_irn_dbg_info(node);
3533 ir_mode *mode = get_irn_mode(node);
3534 ir_node *fist, *load, *mem;
3536 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3537 set_irn_pinned(fist, op_pin_state_floats);
3538 set_ia32_use_frame(fist);
3539 set_ia32_op_type(fist, ia32_AddrModeD);
3541 assert(get_mode_size_bits(mode) <= 32);
3542 /* exception we can only store signed 32 bit integers, so for unsigned
3543 we store a 64bit (signed) integer and load the lower bits */
3544 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3545 set_ia32_ls_mode(fist, mode_Ls);
3547 set_ia32_ls_mode(fist, mode_Is);
3549 SET_IA32_ORIG_NODE(fist, node);
3552 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3554 set_irn_pinned(load, op_pin_state_floats);
3555 set_ia32_use_frame(load);
3556 set_ia32_op_type(load, ia32_AddrModeS);
3557 set_ia32_ls_mode(load, mode_Is);
3558 if (get_ia32_ls_mode(fist) == mode_Ls) {
3559 ia32_attr_t *attr = get_ia32_attr(load);
3560 attr->data.need_64bit_stackent = 1;
3562 ia32_attr_t *attr = get_ia32_attr(load);
3563 attr->data.need_32bit_stackent = 1;
3565 SET_IA32_ORIG_NODE(load, node);
3567 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3571 * Creates a x87 strict Conv by placing a Store and a Load
3573 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3575 ir_node *block = get_nodes_block(node);
3576 ir_graph *irg = get_Block_irg(block);
3577 dbg_info *dbgi = get_irn_dbg_info(node);
3578 ir_node *frame = get_irg_frame(irg);
3579 ir_node *store, *load;
3582 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3583 set_ia32_use_frame(store);
3584 set_ia32_op_type(store, ia32_AddrModeD);
3585 SET_IA32_ORIG_NODE(store, node);
3587 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3588 set_ia32_use_frame(load);
3589 set_ia32_op_type(load, ia32_AddrModeS);
3590 SET_IA32_ORIG_NODE(load, node);
3592 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3596 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3597 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3599 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3601 func = get_mode_size_bits(mode) == 8 ?
3602 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3603 return func(dbgi, block, base, index, mem, val, mode);
3607 * Create a conversion from general purpose to x87 register
3609 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3611 ir_node *src_block = get_nodes_block(node);
3612 ir_node *block = be_transform_node(src_block);
3613 ir_graph *irg = get_Block_irg(block);
3614 dbg_info *dbgi = get_irn_dbg_info(node);
3615 ir_node *op = get_Conv_op(node);
3616 ir_node *new_op = NULL;
3618 ir_mode *store_mode;
3623 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3624 if (possible_int_mode_for_fp(src_mode)) {
3625 ia32_address_mode_t am;
3627 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3628 if (am.op_type == ia32_AddrModeS) {
3629 ia32_address_t *addr = &am.addr;
3631 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3632 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3634 set_am_attributes(fild, &am);
3635 SET_IA32_ORIG_NODE(fild, node);
3637 fix_mem_proj(fild, &am);
3642 if (new_op == NULL) {
3643 new_op = be_transform_node(op);
3646 mode = get_irn_mode(op);
3648 /* first convert to 32 bit signed if necessary */
3649 if (get_mode_size_bits(src_mode) < 32) {
3650 if (!upper_bits_clean(new_op, src_mode)) {
3651 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3652 SET_IA32_ORIG_NODE(new_op, node);
3657 assert(get_mode_size_bits(mode) == 32);
3660 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3662 set_ia32_use_frame(store);
3663 set_ia32_op_type(store, ia32_AddrModeD);
3664 set_ia32_ls_mode(store, mode_Iu);
3666 /* exception for 32bit unsigned, do a 64bit spill+load */
3667 if (!mode_is_signed(mode)) {
3670 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3672 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3673 noreg_GP, nomem, zero_const);
3675 set_ia32_use_frame(zero_store);
3676 set_ia32_op_type(zero_store, ia32_AddrModeD);
3677 add_ia32_am_offs_int(zero_store, 4);
3678 set_ia32_ls_mode(zero_store, mode_Iu);
3683 store = new_rd_Sync(dbgi, block, 2, in);
3684 store_mode = mode_Ls;
3686 store_mode = mode_Is;
3690 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3692 set_ia32_use_frame(fild);
3693 set_ia32_op_type(fild, ia32_AddrModeS);
3694 set_ia32_ls_mode(fild, store_mode);
3696 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3702 * Create a conversion from one integer mode into another one
3704 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3705 dbg_info *dbgi, ir_node *block, ir_node *op,
3708 ir_node *new_block = be_transform_node(block);
3710 ir_mode *smaller_mode;
3711 ia32_address_mode_t am;
3712 ia32_address_t *addr = &am.addr;
3715 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3716 smaller_mode = src_mode;
3718 smaller_mode = tgt_mode;
3721 #ifdef DEBUG_libfirm
3723 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3728 match_arguments(&am, block, NULL, op, NULL,
3729 match_am | match_8bit_am | match_16bit_am);
3731 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3732 /* unnecessary conv. in theory it shouldn't have been AM */
3733 assert(is_ia32_NoReg_GP(addr->base));
3734 assert(is_ia32_NoReg_GP(addr->index));
3735 assert(is_NoMem(addr->mem));
3736 assert(am.addr.offset == 0);
3737 assert(am.addr.symconst_ent == NULL);
3741 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3742 addr->mem, am.new_op2, smaller_mode);
3743 set_am_attributes(new_node, &am);
3744 /* match_arguments assume that out-mode = in-mode, this isn't true here
3746 set_ia32_ls_mode(new_node, smaller_mode);
3747 SET_IA32_ORIG_NODE(new_node, node);
3748 new_node = fix_mem_proj(new_node, &am);
3753 * Transforms a Conv node.
3755 * @return The created ia32 Conv node
3757 static ir_node *gen_Conv(ir_node *node)
3759 ir_node *block = get_nodes_block(node);
3760 ir_node *new_block = be_transform_node(block);
3761 ir_node *op = get_Conv_op(node);
3762 ir_node *new_op = NULL;
3763 dbg_info *dbgi = get_irn_dbg_info(node);
3764 ir_mode *src_mode = get_irn_mode(op);
3765 ir_mode *tgt_mode = get_irn_mode(node);
3766 int src_bits = get_mode_size_bits(src_mode);
3767 int tgt_bits = get_mode_size_bits(tgt_mode);
3768 ir_node *res = NULL;
3770 assert(!mode_is_int(src_mode) || src_bits <= 32);
3771 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3773 /* modeB -> X should already be lowered by the lower_mode_b pass */
3774 if (src_mode == mode_b) {
3775 panic("ConvB not lowered %+F", node);
3778 if (src_mode == tgt_mode) {
3779 if (get_Conv_strict(node)) {
3780 if (ia32_cg_config.use_sse2) {
3781 /* when we are in SSE mode, we can kill all strict no-op conversion */
3782 return be_transform_node(op);
3785 /* this should be optimized already, but who knows... */
3786 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3787 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3788 return be_transform_node(op);
3792 if (mode_is_float(src_mode)) {
3793 new_op = be_transform_node(op);
3794 /* we convert from float ... */
3795 if (mode_is_float(tgt_mode)) {
3797 if (ia32_cg_config.use_sse2) {
3798 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3799 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3801 set_ia32_ls_mode(res, tgt_mode);
3803 if (get_Conv_strict(node)) {
3804 /* if fp_no_float_fold is not set then we assume that we
3805 * don't have any float operations in a non
3806 * mode_float_arithmetic mode and can skip strict upconvs */
3807 if (src_bits < tgt_bits) {
3808 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3811 res = gen_x87_strict_conv(tgt_mode, new_op);
3812 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3816 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3821 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3822 if (ia32_cg_config.use_sse2) {
3823 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3825 set_ia32_ls_mode(res, src_mode);
3827 return gen_x87_fp_to_gp(node);
3831 /* we convert from int ... */
3832 if (mode_is_float(tgt_mode)) {
3834 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3835 if (ia32_cg_config.use_sse2) {
3836 new_op = be_transform_node(op);
3837 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3839 set_ia32_ls_mode(res, tgt_mode);
3841 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3842 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3843 res = gen_x87_gp_to_fp(node, src_mode);
3845 /* we need a strict-Conv, if the int mode has more bits than the
3847 if (float_mantissa < int_mantissa) {
3848 res = gen_x87_strict_conv(tgt_mode, res);
3849 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3853 } else if (tgt_mode == mode_b) {
3854 /* mode_b lowering already took care that we only have 0/1 values */
3855 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3856 src_mode, tgt_mode));
3857 return be_transform_node(op);
3860 if (src_bits == tgt_bits) {
3861 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3862 src_mode, tgt_mode));
3863 return be_transform_node(op);
3866 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3874 static ir_node *create_immediate_or_transform(ir_node *node,
3875 char immediate_constraint_type)
3877 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3878 if (new_node == NULL) {
3879 new_node = be_transform_node(node);
3885 * Transforms a FrameAddr into an ia32 Add.
3887 static ir_node *gen_be_FrameAddr(ir_node *node)
3889 ir_node *block = be_transform_node(get_nodes_block(node));
3890 ir_node *op = be_get_FrameAddr_frame(node);
3891 ir_node *new_op = be_transform_node(op);
3892 dbg_info *dbgi = get_irn_dbg_info(node);
3895 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3896 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3897 set_ia32_use_frame(new_node);
3899 SET_IA32_ORIG_NODE(new_node, node);
3905 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3907 static ir_node *gen_be_Return(ir_node *node)
3909 ir_graph *irg = current_ir_graph;
3910 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3911 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3912 ir_entity *ent = get_irg_entity(irg);
3913 ir_type *tp = get_entity_type(ent);
3918 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3919 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3921 int pn_ret_val, pn_ret_mem, arity, i;
3923 assert(ret_val != NULL);
3924 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3925 return be_duplicate_node(node);
3928 res_type = get_method_res_type(tp, 0);
3930 if (! is_Primitive_type(res_type)) {
3931 return be_duplicate_node(node);
3934 mode = get_type_mode(res_type);
3935 if (! mode_is_float(mode)) {
3936 return be_duplicate_node(node);
3939 assert(get_method_n_ress(tp) == 1);
3941 pn_ret_val = get_Proj_proj(ret_val);
3942 pn_ret_mem = get_Proj_proj(ret_mem);
3944 /* get the Barrier */
3945 barrier = get_Proj_pred(ret_val);
3947 /* get result input of the Barrier */
3948 ret_val = get_irn_n(barrier, pn_ret_val);
3949 new_ret_val = be_transform_node(ret_val);
3951 /* get memory input of the Barrier */
3952 ret_mem = get_irn_n(barrier, pn_ret_mem);
3953 new_ret_mem = be_transform_node(ret_mem);
3955 frame = get_irg_frame(irg);
3957 dbgi = get_irn_dbg_info(barrier);
3958 block = be_transform_node(get_nodes_block(barrier));
3960 /* store xmm0 onto stack */
3961 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3962 new_ret_mem, new_ret_val);
3963 set_ia32_ls_mode(sse_store, mode);
3964 set_ia32_op_type(sse_store, ia32_AddrModeD);
3965 set_ia32_use_frame(sse_store);
3967 /* load into x87 register */
3968 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3969 set_ia32_op_type(fld, ia32_AddrModeS);
3970 set_ia32_use_frame(fld);
3972 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3973 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3975 /* create a new barrier */
3976 arity = get_irn_arity(barrier);
3977 in = ALLOCAN(ir_node*, arity);
3978 for (i = 0; i < arity; ++i) {
3981 if (i == pn_ret_val) {
3983 } else if (i == pn_ret_mem) {
3986 ir_node *in = get_irn_n(barrier, i);
3987 new_in = be_transform_node(in);
3992 new_barrier = new_ir_node(dbgi, irg, block,
3993 get_irn_op(barrier), get_irn_mode(barrier),
3995 copy_node_attr(irg, barrier, new_barrier);
3996 be_duplicate_deps(barrier, new_barrier);
3997 be_set_transformed_node(barrier, new_barrier);
3999 /* transform normally */
4000 return be_duplicate_node(node);
4004 * Transform a be_AddSP into an ia32_SubSP.
4006 static ir_node *gen_be_AddSP(ir_node *node)
4008 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4009 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4011 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4012 match_am | match_immediate);
4016 * Transform a be_SubSP into an ia32_AddSP
4018 static ir_node *gen_be_SubSP(ir_node *node)
4020 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4021 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4023 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4024 match_am | match_immediate);
4028 * Change some phi modes
4030 static ir_node *gen_Phi(ir_node *node)
4032 const arch_register_req_t *req;
4033 ir_node *block = be_transform_node(get_nodes_block(node));
4034 ir_graph *irg = current_ir_graph;
4035 dbg_info *dbgi = get_irn_dbg_info(node);
4036 ir_mode *mode = get_irn_mode(node);
4039 if (ia32_mode_needs_gp_reg(mode)) {
4040 /* we shouldn't have any 64bit stuff around anymore */
4041 assert(get_mode_size_bits(mode) <= 32);
4042 /* all integer operations are on 32bit registers now */
4044 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4045 } else if (mode_is_float(mode)) {
4046 if (ia32_cg_config.use_sse2) {
4048 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4051 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4054 req = arch_no_register_req;
4057 /* phi nodes allow loops, so we use the old arguments for now
4058 * and fix this later */
4059 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4060 get_irn_in(node) + 1);
4061 copy_node_attr(irg, node, phi);
4062 be_duplicate_deps(node, phi);
4064 arch_set_out_register_req(phi, 0, req);
4066 be_enqueue_preds(node);
4071 static ir_node *gen_Jmp(ir_node *node)
4073 ir_node *block = get_nodes_block(node);
4074 ir_node *new_block = be_transform_node(block);
4075 dbg_info *dbgi = get_irn_dbg_info(node);
4078 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4079 SET_IA32_ORIG_NODE(new_node, node);
4087 static ir_node *gen_IJmp(ir_node *node)
4089 ir_node *block = get_nodes_block(node);
4090 ir_node *new_block = be_transform_node(block);
4091 dbg_info *dbgi = get_irn_dbg_info(node);
4092 ir_node *op = get_IJmp_target(node);
4094 ia32_address_mode_t am;
4095 ia32_address_t *addr = &am.addr;
4097 assert(get_irn_mode(op) == mode_P);
4099 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4101 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4102 addr->mem, am.new_op2);
4103 set_am_attributes(new_node, &am);
4104 SET_IA32_ORIG_NODE(new_node, node);
4106 new_node = fix_mem_proj(new_node, &am);
4112 * Transform a Bound node.
4114 static ir_node *gen_Bound(ir_node *node)
4117 ir_node *lower = get_Bound_lower(node);
4118 dbg_info *dbgi = get_irn_dbg_info(node);
4120 if (is_Const_0(lower)) {
4121 /* typical case for Java */
4122 ir_node *sub, *res, *flags, *block;
4124 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4126 match_mode_neutral | match_am | match_immediate);
4128 block = get_nodes_block(res);
4129 if (! is_Proj(res)) {
4131 set_irn_mode(sub, mode_T);
4132 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4134 sub = get_Proj_pred(res);
4136 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4137 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4138 SET_IA32_ORIG_NODE(new_node, node);
4140 panic("generic Bound not supported in ia32 Backend");
4146 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4148 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4149 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4151 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4152 match_immediate | match_mode_neutral);
4155 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4157 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4158 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4159 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4163 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4165 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4166 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4167 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4171 static ir_node *gen_ia32_l_Add(ir_node *node)
4173 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4174 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4175 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4176 match_commutative | match_am | match_immediate |
4177 match_mode_neutral);
4179 if (is_Proj(lowered)) {
4180 lowered = get_Proj_pred(lowered);
4182 assert(is_ia32_Add(lowered));
4183 set_irn_mode(lowered, mode_T);
4189 static ir_node *gen_ia32_l_Adc(ir_node *node)
4191 return gen_binop_flags(node, new_bd_ia32_Adc,
4192 match_commutative | match_am | match_immediate |
4193 match_mode_neutral);
4197 * Transforms a l_MulS into a "real" MulS node.
4199 * @return the created ia32 Mul node
4201 static ir_node *gen_ia32_l_Mul(ir_node *node)
4203 ir_node *left = get_binop_left(node);
4204 ir_node *right = get_binop_right(node);
4206 return gen_binop(node, left, right, new_bd_ia32_Mul,
4207 match_commutative | match_am | match_mode_neutral);
4211 * Transforms a l_IMulS into a "real" IMul1OPS node.
4213 * @return the created ia32 IMul1OP node
4215 static ir_node *gen_ia32_l_IMul(ir_node *node)
4217 ir_node *left = get_binop_left(node);
4218 ir_node *right = get_binop_right(node);
4220 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4221 match_commutative | match_am | match_mode_neutral);
4224 static ir_node *gen_ia32_l_Sub(ir_node *node)
4226 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4227 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4228 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4229 match_am | match_immediate | match_mode_neutral);
4231 if (is_Proj(lowered)) {
4232 lowered = get_Proj_pred(lowered);
4234 assert(is_ia32_Sub(lowered));
4235 set_irn_mode(lowered, mode_T);
4241 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4243 return gen_binop_flags(node, new_bd_ia32_Sbb,
4244 match_am | match_immediate | match_mode_neutral);
4248 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4249 * op1 - target to be shifted
4250 * op2 - contains bits to be shifted into target
4252 * Only op3 can be an immediate.
4254 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4255 ir_node *low, ir_node *count)
4257 ir_node *block = get_nodes_block(node);
4258 ir_node *new_block = be_transform_node(block);
4259 dbg_info *dbgi = get_irn_dbg_info(node);
4260 ir_node *new_high = be_transform_node(high);
4261 ir_node *new_low = be_transform_node(low);
4265 /* the shift amount can be any mode that is bigger than 5 bits, since all
4266 * other bits are ignored anyway */
4267 while (is_Conv(count) &&
4268 get_irn_n_edges(count) == 1 &&
4269 mode_is_int(get_irn_mode(count))) {
4270 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4271 count = get_Conv_op(count);
4273 new_count = create_immediate_or_transform(count, 0);
4275 if (is_ia32_l_ShlD(node)) {
4276 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4279 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4282 SET_IA32_ORIG_NODE(new_node, node);
4287 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4289 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4290 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4291 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4292 return gen_lowered_64bit_shifts(node, high, low, count);
4295 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4297 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4298 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4299 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4300 return gen_lowered_64bit_shifts(node, high, low, count);
4303 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4305 ir_node *src_block = get_nodes_block(node);
4306 ir_node *block = be_transform_node(src_block);
4307 ir_graph *irg = current_ir_graph;
4308 dbg_info *dbgi = get_irn_dbg_info(node);
4309 ir_node *frame = get_irg_frame(irg);
4310 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4311 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4312 ir_node *new_val_low = be_transform_node(val_low);
4313 ir_node *new_val_high = be_transform_node(val_high);
4315 ir_node *sync, *fild, *res;
4316 ir_node *store_low, *store_high;
4318 if (ia32_cg_config.use_sse2) {
4319 panic("ia32_l_LLtoFloat not implemented for SSE2");
4323 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4325 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4327 SET_IA32_ORIG_NODE(store_low, node);
4328 SET_IA32_ORIG_NODE(store_high, node);
4330 set_ia32_use_frame(store_low);
4331 set_ia32_use_frame(store_high);
4332 set_ia32_op_type(store_low, ia32_AddrModeD);
4333 set_ia32_op_type(store_high, ia32_AddrModeD);
4334 set_ia32_ls_mode(store_low, mode_Iu);
4335 set_ia32_ls_mode(store_high, mode_Is);
4336 add_ia32_am_offs_int(store_high, 4);
4340 sync = new_rd_Sync(dbgi, block, 2, in);
4343 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4345 set_ia32_use_frame(fild);
4346 set_ia32_op_type(fild, ia32_AddrModeS);
4347 set_ia32_ls_mode(fild, mode_Ls);
4349 SET_IA32_ORIG_NODE(fild, node);
4351 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4353 if (! mode_is_signed(get_irn_mode(val_high))) {
4354 ia32_address_mode_t am;
4356 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4359 am.addr.base = get_symconst_base();
4360 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4361 am.addr.mem = nomem;
4364 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4365 am.addr.use_frame = 0;
4366 am.addr.frame_entity = NULL;
4367 am.addr.symconst_sign = 0;
4368 am.ls_mode = mode_F;
4369 am.mem_proj = nomem;
4370 am.op_type = ia32_AddrModeS;
4372 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4373 am.pinned = op_pin_state_floats;
4375 am.ins_permuted = 0;
4377 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4378 am.new_op1, am.new_op2, get_fpcw());
4379 set_am_attributes(fadd, &am);
4381 set_irn_mode(fadd, mode_T);
4382 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4387 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4389 ir_node *src_block = get_nodes_block(node);
4390 ir_node *block = be_transform_node(src_block);
4391 ir_graph *irg = get_Block_irg(block);
4392 dbg_info *dbgi = get_irn_dbg_info(node);
4393 ir_node *frame = get_irg_frame(irg);
4394 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4395 ir_node *new_val = be_transform_node(val);
4396 ir_node *fist, *mem;
4398 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4399 SET_IA32_ORIG_NODE(fist, node);
4400 set_ia32_use_frame(fist);
4401 set_ia32_op_type(fist, ia32_AddrModeD);
4402 set_ia32_ls_mode(fist, mode_Ls);
4407 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4409 ir_node *block = be_transform_node(get_nodes_block(node));
4410 ir_graph *irg = get_Block_irg(block);
4411 ir_node *pred = get_Proj_pred(node);
4412 ir_node *new_pred = be_transform_node(pred);
4413 ir_node *frame = get_irg_frame(irg);
4414 dbg_info *dbgi = get_irn_dbg_info(node);
4415 long pn = get_Proj_proj(node);
4420 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4421 SET_IA32_ORIG_NODE(load, node);
4422 set_ia32_use_frame(load);
4423 set_ia32_op_type(load, ia32_AddrModeS);
4424 set_ia32_ls_mode(load, mode_Iu);
4425 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4426 * 32 bit from it with this particular load */
4427 attr = get_ia32_attr(load);
4428 attr->data.need_64bit_stackent = 1;
4430 if (pn == pn_ia32_l_FloattoLL_res_high) {
4431 add_ia32_am_offs_int(load, 4);
4433 assert(pn == pn_ia32_l_FloattoLL_res_low);
4436 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4442 * Transform the Projs of an AddSP.
4444 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4446 ir_node *pred = get_Proj_pred(node);
4447 ir_node *new_pred = be_transform_node(pred);
4448 dbg_info *dbgi = get_irn_dbg_info(node);
4449 long proj = get_Proj_proj(node);
4451 if (proj == pn_be_AddSP_sp) {
4452 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4453 pn_ia32_SubSP_stack);
4454 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4456 } else if (proj == pn_be_AddSP_res) {
4457 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4458 pn_ia32_SubSP_addr);
4459 } else if (proj == pn_be_AddSP_M) {
4460 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4463 panic("No idea how to transform proj->AddSP");
4467 * Transform the Projs of a SubSP.
4469 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4471 ir_node *pred = get_Proj_pred(node);
4472 ir_node *new_pred = be_transform_node(pred);
4473 dbg_info *dbgi = get_irn_dbg_info(node);
4474 long proj = get_Proj_proj(node);
4476 if (proj == pn_be_SubSP_sp) {
4477 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4478 pn_ia32_AddSP_stack);
4479 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4481 } else if (proj == pn_be_SubSP_M) {
4482 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4485 panic("No idea how to transform proj->SubSP");
4489 * Transform and renumber the Projs from a Load.
4491 static ir_node *gen_Proj_Load(ir_node *node)
4494 ir_node *block = be_transform_node(get_nodes_block(node));
4495 ir_node *pred = get_Proj_pred(node);
4496 dbg_info *dbgi = get_irn_dbg_info(node);
4497 long proj = get_Proj_proj(node);
4499 /* loads might be part of source address mode matches, so we don't
4500 * transform the ProjMs yet (with the exception of loads whose result is
4503 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4506 /* this is needed, because sometimes we have loops that are only
4507 reachable through the ProjM */
4508 be_enqueue_preds(node);
4509 /* do it in 2 steps, to silence firm verifier */
4510 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4511 set_Proj_proj(res, pn_ia32_mem);
4515 /* renumber the proj */
4516 new_pred = be_transform_node(pred);
4517 if (is_ia32_Load(new_pred)) {
4520 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4522 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4523 case pn_Load_X_regular:
4524 return new_rd_Jmp(dbgi, block);
4525 case pn_Load_X_except:
4526 /* This Load might raise an exception. Mark it. */
4527 set_ia32_exc_label(new_pred, 1);
4528 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4532 } else if (is_ia32_Conv_I2I(new_pred) ||
4533 is_ia32_Conv_I2I8Bit(new_pred)) {
4534 set_irn_mode(new_pred, mode_T);
4535 if (proj == pn_Load_res) {
4536 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4537 } else if (proj == pn_Load_M) {
4538 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4540 } else if (is_ia32_xLoad(new_pred)) {
4543 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4545 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4546 case pn_Load_X_regular:
4547 return new_rd_Jmp(dbgi, block);
4548 case pn_Load_X_except:
4549 /* This Load might raise an exception. Mark it. */
4550 set_ia32_exc_label(new_pred, 1);
4551 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4555 } else if (is_ia32_vfld(new_pred)) {
4558 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4560 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4561 case pn_Load_X_regular:
4562 return new_rd_Jmp(dbgi, block);
4563 case pn_Load_X_except:
4564 /* This Load might raise an exception. Mark it. */
4565 set_ia32_exc_label(new_pred, 1);
4566 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4571 /* can happen for ProJMs when source address mode happened for the
4574 /* however it should not be the result proj, as that would mean the
4575 load had multiple users and should not have been used for
4577 if (proj != pn_Load_M) {
4578 panic("internal error: transformed node not a Load");
4580 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4583 panic("No idea how to transform proj");
4587 * Transform and renumber the Projs from a DivMod like instruction.
4589 static ir_node *gen_Proj_DivMod(ir_node *node)
4591 ir_node *block = be_transform_node(get_nodes_block(node));
4592 ir_node *pred = get_Proj_pred(node);
4593 ir_node *new_pred = be_transform_node(pred);
4594 dbg_info *dbgi = get_irn_dbg_info(node);
4595 long proj = get_Proj_proj(node);
4597 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4599 switch (get_irn_opcode(pred)) {
4603 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4605 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4606 case pn_Div_X_regular:
4607 return new_rd_Jmp(dbgi, block);
4608 case pn_Div_X_except:
4609 set_ia32_exc_label(new_pred, 1);
4610 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4618 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4620 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4621 case pn_Mod_X_except:
4622 set_ia32_exc_label(new_pred, 1);
4623 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4631 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4632 case pn_DivMod_res_div:
4633 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4634 case pn_DivMod_res_mod:
4635 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4636 case pn_DivMod_X_regular:
4637 return new_rd_Jmp(dbgi, block);
4638 case pn_DivMod_X_except:
4639 set_ia32_exc_label(new_pred, 1);
4640 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4649 panic("No idea how to transform proj->DivMod");
4653 * Transform and renumber the Projs from a CopyB.
4655 static ir_node *gen_Proj_CopyB(ir_node *node)
4657 ir_node *pred = get_Proj_pred(node);
4658 ir_node *new_pred = be_transform_node(pred);
4659 dbg_info *dbgi = get_irn_dbg_info(node);
4660 long proj = get_Proj_proj(node);
4664 if (is_ia32_CopyB_i(new_pred)) {
4665 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4666 } else if (is_ia32_CopyB(new_pred)) {
4667 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4674 panic("No idea how to transform proj->CopyB");
4678 * Transform and renumber the Projs from a Quot.
4680 static ir_node *gen_Proj_Quot(ir_node *node)
4682 ir_node *pred = get_Proj_pred(node);
4683 ir_node *new_pred = be_transform_node(pred);
4684 dbg_info *dbgi = get_irn_dbg_info(node);
4685 long proj = get_Proj_proj(node);
4689 if (is_ia32_xDiv(new_pred)) {
4690 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4691 } else if (is_ia32_vfdiv(new_pred)) {
4692 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4696 if (is_ia32_xDiv(new_pred)) {
4697 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4698 } else if (is_ia32_vfdiv(new_pred)) {
4699 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4702 case pn_Quot_X_regular:
4703 case pn_Quot_X_except:
4708 panic("No idea how to transform proj->Quot");
4711 static ir_node *gen_be_Call(ir_node *node)
4713 dbg_info *const dbgi = get_irn_dbg_info(node);
4714 ir_node *const src_block = get_nodes_block(node);
4715 ir_node *const block = be_transform_node(src_block);
4716 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4717 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4718 ir_node *const sp = be_transform_node(src_sp);
4719 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4720 ia32_address_mode_t am;
4721 ia32_address_t *const addr = &am.addr;
4726 ir_node * eax = noreg_GP;
4727 ir_node * ecx = noreg_GP;
4728 ir_node * edx = noreg_GP;
4729 unsigned const pop = be_Call_get_pop(node);
4730 ir_type *const call_tp = be_Call_get_type(node);
4731 int old_no_pic_adjust;
4733 /* Run the x87 simulator if the call returns a float value */
4734 if (get_method_n_ress(call_tp) > 0) {
4735 ir_type *const res_type = get_method_res_type(call_tp, 0);
4736 ir_mode *const res_mode = get_type_mode(res_type);
4738 if (res_mode != NULL && mode_is_float(res_mode)) {
4739 ir_graph *irg = current_ir_graph;
4740 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4741 irg_data->do_x87_sim = 1;
4745 /* We do not want be_Call direct calls */
4746 assert(be_Call_get_entity(node) == NULL);
4748 /* special case for PIC trampoline calls */
4749 old_no_pic_adjust = no_pic_adjust;
4750 no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4752 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4753 match_am | match_immediate);
4755 no_pic_adjust = old_no_pic_adjust;
4757 i = get_irn_arity(node) - 1;
4758 fpcw = be_transform_node(get_irn_n(node, i--));
4759 for (; i >= be_pos_Call_first_arg; --i) {
4760 arch_register_req_t const *const req = arch_get_register_req(node, i);
4761 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4763 assert(req->type == arch_register_req_type_limited);
4764 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4766 switch (*req->limited) {
4767 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4768 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4769 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4770 default: panic("Invalid GP register for register parameter");
4774 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4775 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4776 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4777 set_am_attributes(call, &am);
4778 call = fix_mem_proj(call, &am);
4780 if (get_irn_pinned(node) == op_pin_state_pinned)
4781 set_irn_pinned(call, op_pin_state_pinned);
4783 SET_IA32_ORIG_NODE(call, node);
4785 if (ia32_cg_config.use_sse2) {
4786 /* remember this call for post-processing */
4787 ARR_APP1(ir_node *, call_list, call);
4788 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4795 * Transform Builtin trap
4797 static ir_node *gen_trap(ir_node *node)
4799 dbg_info *dbgi = get_irn_dbg_info(node);
4800 ir_node *block = be_transform_node(get_nodes_block(node));
4801 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4803 return new_bd_ia32_UD2(dbgi, block, mem);
4807 * Transform Builtin debugbreak
4809 static ir_node *gen_debugbreak(ir_node *node)
4811 dbg_info *dbgi = get_irn_dbg_info(node);
4812 ir_node *block = be_transform_node(get_nodes_block(node));
4813 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4815 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4819 * Transform Builtin return_address
4821 static ir_node *gen_return_address(ir_node *node)
4823 ir_node *param = get_Builtin_param(node, 0);
4824 ir_node *frame = get_Builtin_param(node, 1);
4825 dbg_info *dbgi = get_irn_dbg_info(node);
4826 tarval *tv = get_Const_tarval(param);
4827 unsigned long value = get_tarval_long(tv);
4829 ir_node *block = be_transform_node(get_nodes_block(node));
4830 ir_node *ptr = be_transform_node(frame);
4834 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4835 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4836 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4839 /* load the return address from this frame */
4840 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4842 set_irn_pinned(load, get_irn_pinned(node));
4843 set_ia32_op_type(load, ia32_AddrModeS);
4844 set_ia32_ls_mode(load, mode_Iu);
4846 set_ia32_am_offs_int(load, 0);
4847 set_ia32_use_frame(load);
4848 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4850 if (get_irn_pinned(node) == op_pin_state_floats) {
4851 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4852 && pn_ia32_vfld_res == pn_ia32_Load_res
4853 && pn_ia32_Load_res == pn_ia32_res);
4854 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4857 SET_IA32_ORIG_NODE(load, node);
4858 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4862 * Transform Builtin frame_address
4864 static ir_node *gen_frame_address(ir_node *node)
4866 ir_node *param = get_Builtin_param(node, 0);
4867 ir_node *frame = get_Builtin_param(node, 1);
4868 dbg_info *dbgi = get_irn_dbg_info(node);
4869 tarval *tv = get_Const_tarval(param);
4870 unsigned long value = get_tarval_long(tv);
4872 ir_node *block = be_transform_node(get_nodes_block(node));
4873 ir_node *ptr = be_transform_node(frame);
4878 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4879 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4880 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4883 /* load the frame address from this frame */
4884 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4886 set_irn_pinned(load, get_irn_pinned(node));
4887 set_ia32_op_type(load, ia32_AddrModeS);
4888 set_ia32_ls_mode(load, mode_Iu);
4890 ent = ia32_get_frame_address_entity();
4892 set_ia32_am_offs_int(load, 0);
4893 set_ia32_use_frame(load);
4894 set_ia32_frame_ent(load, ent);
4896 /* will fail anyway, but gcc does this: */
4897 set_ia32_am_offs_int(load, 0);
4900 if (get_irn_pinned(node) == op_pin_state_floats) {
4901 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4902 && pn_ia32_vfld_res == pn_ia32_Load_res
4903 && pn_ia32_Load_res == pn_ia32_res);
4904 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4907 SET_IA32_ORIG_NODE(load, node);
4908 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4912 * Transform Builtin frame_address
4914 static ir_node *gen_prefetch(ir_node *node)
4917 ir_node *ptr, *block, *mem, *base, *index;
4918 ir_node *param, *new_node;
4921 ia32_address_t addr;
4923 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4924 /* no prefetch at all, route memory */
4925 return be_transform_node(get_Builtin_mem(node));
4928 param = get_Builtin_param(node, 1);
4929 tv = get_Const_tarval(param);
4930 rw = get_tarval_long(tv);
4932 /* construct load address */
4933 memset(&addr, 0, sizeof(addr));
4934 ptr = get_Builtin_param(node, 0);
4935 ia32_create_address_mode(&addr, ptr, 0);
4942 base = be_transform_node(base);
4945 if (index == NULL) {
4948 index = be_transform_node(index);
4951 dbgi = get_irn_dbg_info(node);
4952 block = be_transform_node(get_nodes_block(node));
4953 mem = be_transform_node(get_Builtin_mem(node));
4955 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4956 /* we have 3DNow!, this was already checked above */
4957 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4958 } else if (ia32_cg_config.use_sse_prefetch) {
4959 /* note: rw == 1 is IGNORED in that case */
4960 param = get_Builtin_param(node, 2);
4961 tv = get_Const_tarval(param);
4962 locality = get_tarval_long(tv);
4964 /* SSE style prefetch */
4967 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4970 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4973 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4976 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4980 assert(ia32_cg_config.use_3dnow_prefetch);
4981 /* 3DNow! style prefetch */
4982 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4985 set_irn_pinned(new_node, get_irn_pinned(node));
4986 set_ia32_op_type(new_node, ia32_AddrModeS);
4987 set_ia32_ls_mode(new_node, mode_Bu);
4988 set_address(new_node, &addr);
4990 SET_IA32_ORIG_NODE(new_node, node);
4992 be_dep_on_frame(new_node);
4993 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4997 * Transform bsf like node
4999 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5001 ir_node *param = get_Builtin_param(node, 0);
5002 dbg_info *dbgi = get_irn_dbg_info(node);
5004 ir_node *block = get_nodes_block(node);
5005 ir_node *new_block = be_transform_node(block);
5007 ia32_address_mode_t am;
5008 ia32_address_t *addr = &am.addr;
5011 match_arguments(&am, block, NULL, param, NULL, match_am);
5013 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5014 set_am_attributes(cnt, &am);
5015 set_ia32_ls_mode(cnt, get_irn_mode(param));
5017 SET_IA32_ORIG_NODE(cnt, node);
5018 return fix_mem_proj(cnt, &am);
5022 * Transform builtin ffs.
5024 static ir_node *gen_ffs(ir_node *node)
5026 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5027 ir_node *real = skip_Proj(bsf);
5028 dbg_info *dbgi = get_irn_dbg_info(real);
5029 ir_node *block = get_nodes_block(real);
5030 ir_node *flag, *set, *conv, *neg, *or;
5033 if (get_irn_mode(real) != mode_T) {
5034 set_irn_mode(real, mode_T);
5035 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5038 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5041 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5042 SET_IA32_ORIG_NODE(set, node);
5045 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5046 SET_IA32_ORIG_NODE(conv, node);
5049 neg = new_bd_ia32_Neg(dbgi, block, conv);
5052 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5053 set_ia32_commutative(or);
5056 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5060 * Transform builtin clz.
5062 static ir_node *gen_clz(ir_node *node)
5064 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5065 ir_node *real = skip_Proj(bsr);
5066 dbg_info *dbgi = get_irn_dbg_info(real);
5067 ir_node *block = get_nodes_block(real);
5068 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5070 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5074 * Transform builtin ctz.
5076 static ir_node *gen_ctz(ir_node *node)
5078 return gen_unop_AM(node, new_bd_ia32_Bsf);
5082 * Transform builtin parity.
5084 static ir_node *gen_parity(ir_node *node)
5086 ir_node *param = get_Builtin_param(node, 0);
5087 dbg_info *dbgi = get_irn_dbg_info(node);
5089 ir_node *block = get_nodes_block(node);
5091 ir_node *new_block = be_transform_node(block);
5092 ir_node *imm, *cmp, *new_node;
5094 ia32_address_mode_t am;
5095 ia32_address_t *addr = &am.addr;
5099 match_arguments(&am, block, NULL, param, NULL, match_am);
5100 imm = ia32_create_Immediate(NULL, 0, 0);
5101 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5102 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5103 set_am_attributes(cmp, &am);
5104 set_ia32_ls_mode(cmp, mode_Iu);
5106 SET_IA32_ORIG_NODE(cmp, node);
5108 cmp = fix_mem_proj(cmp, &am);
5111 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5112 SET_IA32_ORIG_NODE(new_node, node);
5115 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5116 nomem, new_node, mode_Bu);
5117 SET_IA32_ORIG_NODE(new_node, node);
5122 * Transform builtin popcount
5124 static ir_node *gen_popcount(ir_node *node)
5126 ir_node *param = get_Builtin_param(node, 0);
5127 dbg_info *dbgi = get_irn_dbg_info(node);
5129 ir_node *block = get_nodes_block(node);
5130 ir_node *new_block = be_transform_node(block);
5133 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5135 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5136 if (ia32_cg_config.use_popcnt) {
5137 ia32_address_mode_t am;
5138 ia32_address_t *addr = &am.addr;
5141 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5143 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5144 set_am_attributes(cnt, &am);
5145 set_ia32_ls_mode(cnt, get_irn_mode(param));
5147 SET_IA32_ORIG_NODE(cnt, node);
5148 return fix_mem_proj(cnt, &am);
5151 new_param = be_transform_node(param);
5153 /* do the standard popcount algo */
5155 /* m1 = x & 0x55555555 */
5156 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5157 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5160 simm = ia32_create_Immediate(NULL, 0, 1);
5161 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5163 /* m2 = s1 & 0x55555555 */
5164 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5167 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5169 /* m4 = m3 & 0x33333333 */
5170 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5171 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5174 simm = ia32_create_Immediate(NULL, 0, 2);
5175 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5177 /* m5 = s2 & 0x33333333 */
5178 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5181 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5183 /* m7 = m6 & 0x0F0F0F0F */
5184 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5185 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5188 simm = ia32_create_Immediate(NULL, 0, 4);
5189 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5191 /* m8 = s3 & 0x0F0F0F0F */
5192 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5195 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5197 /* m10 = m9 & 0x00FF00FF */
5198 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5199 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5202 simm = ia32_create_Immediate(NULL, 0, 8);
5203 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5205 /* m11 = s4 & 0x00FF00FF */
5206 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5208 /* m12 = m10 + m11 */
5209 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5211 /* m13 = m12 & 0x0000FFFF */
5212 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5213 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5215 /* s5 = m12 >> 16 */
5216 simm = ia32_create_Immediate(NULL, 0, 16);
5217 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5219 /* res = m13 + s5 */
5220 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5224 * Transform builtin byte swap.
5226 static ir_node *gen_bswap(ir_node *node)
5228 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5229 dbg_info *dbgi = get_irn_dbg_info(node);
5231 ir_node *block = get_nodes_block(node);
5232 ir_node *new_block = be_transform_node(block);
5233 ir_mode *mode = get_irn_mode(param);
5234 unsigned size = get_mode_size_bits(mode);
5235 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5239 if (ia32_cg_config.use_i486) {
5240 /* swap available */
5241 return new_bd_ia32_Bswap(dbgi, new_block, param);
5243 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5244 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5246 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5247 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5249 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5251 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5252 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5254 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5255 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5258 /* swap16 always available */
5259 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5262 panic("Invalid bswap size (%d)", size);
5267 * Transform builtin outport.
5269 static ir_node *gen_outport(ir_node *node)
5271 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5272 ir_node *oldv = get_Builtin_param(node, 1);
5273 ir_mode *mode = get_irn_mode(oldv);
5274 ir_node *value = be_transform_node(oldv);
5275 ir_node *block = be_transform_node(get_nodes_block(node));
5276 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5277 dbg_info *dbgi = get_irn_dbg_info(node);
5279 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5280 set_ia32_ls_mode(res, mode);
5285 * Transform builtin inport.
5287 static ir_node *gen_inport(ir_node *node)
5289 ir_type *tp = get_Builtin_type(node);
5290 ir_type *rstp = get_method_res_type(tp, 0);
5291 ir_mode *mode = get_type_mode(rstp);
5292 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5293 ir_node *block = be_transform_node(get_nodes_block(node));
5294 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5295 dbg_info *dbgi = get_irn_dbg_info(node);
5297 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5298 set_ia32_ls_mode(res, mode);
5300 /* check for missing Result Proj */
5305 * Transform a builtin inner trampoline
5307 static ir_node *gen_inner_trampoline(ir_node *node)
5309 ir_node *ptr = get_Builtin_param(node, 0);
5310 ir_node *callee = get_Builtin_param(node, 1);
5311 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5312 ir_node *mem = get_Builtin_mem(node);
5313 ir_node *block = get_nodes_block(node);
5314 ir_node *new_block = be_transform_node(block);
5318 ir_node *trampoline;
5320 dbg_info *dbgi = get_irn_dbg_info(node);
5321 ia32_address_t addr;
5323 /* construct store address */
5324 memset(&addr, 0, sizeof(addr));
5325 ia32_create_address_mode(&addr, ptr, 0);
5327 if (addr.base == NULL) {
5328 addr.base = noreg_GP;
5330 addr.base = be_transform_node(addr.base);
5333 if (addr.index == NULL) {
5334 addr.index = noreg_GP;
5336 addr.index = be_transform_node(addr.index);
5338 addr.mem = be_transform_node(mem);
5340 /* mov ecx, <env> */
5341 val = ia32_create_Immediate(NULL, 0, 0xB9);
5342 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5343 addr.index, addr.mem, val);
5344 set_irn_pinned(store, get_irn_pinned(node));
5345 set_ia32_op_type(store, ia32_AddrModeD);
5346 set_ia32_ls_mode(store, mode_Bu);
5347 set_address(store, &addr);
5351 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5352 addr.index, addr.mem, env);
5353 set_irn_pinned(store, get_irn_pinned(node));
5354 set_ia32_op_type(store, ia32_AddrModeD);
5355 set_ia32_ls_mode(store, mode_Iu);
5356 set_address(store, &addr);
5360 /* jmp rel <callee> */
5361 val = ia32_create_Immediate(NULL, 0, 0xE9);
5362 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5363 addr.index, addr.mem, val);
5364 set_irn_pinned(store, get_irn_pinned(node));
5365 set_ia32_op_type(store, ia32_AddrModeD);
5366 set_ia32_ls_mode(store, mode_Bu);
5367 set_address(store, &addr);
5371 trampoline = be_transform_node(ptr);
5373 /* the callee is typically an immediate */
5374 if (is_SymConst(callee)) {
5375 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5377 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5379 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5381 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5382 addr.index, addr.mem, rel);
5383 set_irn_pinned(store, get_irn_pinned(node));
5384 set_ia32_op_type(store, ia32_AddrModeD);
5385 set_ia32_ls_mode(store, mode_Iu);
5386 set_address(store, &addr);
5391 return new_r_Tuple(new_block, 2, in);
5395 * Transform Builtin node.
5397 static ir_node *gen_Builtin(ir_node *node)
5399 ir_builtin_kind kind = get_Builtin_kind(node);
5403 return gen_trap(node);
5404 case ir_bk_debugbreak:
5405 return gen_debugbreak(node);
5406 case ir_bk_return_address:
5407 return gen_return_address(node);
5408 case ir_bk_frame_address:
5409 return gen_frame_address(node);
5410 case ir_bk_prefetch:
5411 return gen_prefetch(node);
5413 return gen_ffs(node);
5415 return gen_clz(node);
5417 return gen_ctz(node);
5419 return gen_parity(node);
5420 case ir_bk_popcount:
5421 return gen_popcount(node);
5423 return gen_bswap(node);
5425 return gen_outport(node);
5427 return gen_inport(node);
5428 case ir_bk_inner_trampoline:
5429 return gen_inner_trampoline(node);
5431 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5435 * Transform Proj(Builtin) node.
5437 static ir_node *gen_Proj_Builtin(ir_node *proj)
5439 ir_node *node = get_Proj_pred(proj);
5440 ir_node *new_node = be_transform_node(node);
5441 ir_builtin_kind kind = get_Builtin_kind(node);
5444 case ir_bk_return_address:
5445 case ir_bk_frame_address:
5450 case ir_bk_popcount:
5452 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5455 case ir_bk_debugbreak:
5456 case ir_bk_prefetch:
5458 assert(get_Proj_proj(proj) == pn_Builtin_M);
5461 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5462 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5464 assert(get_Proj_proj(proj) == pn_Builtin_M);
5465 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5467 case ir_bk_inner_trampoline:
5468 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5469 return get_Tuple_pred(new_node, 1);
5471 assert(get_Proj_proj(proj) == pn_Builtin_M);
5472 return get_Tuple_pred(new_node, 0);
5475 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5478 static ir_node *gen_be_IncSP(ir_node *node)
5480 ir_node *res = be_duplicate_node(node);
5481 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5487 * Transform the Projs from a be_Call.
5489 static ir_node *gen_Proj_be_Call(ir_node *node)
5491 ir_node *call = get_Proj_pred(node);
5492 ir_node *new_call = be_transform_node(call);
5493 dbg_info *dbgi = get_irn_dbg_info(node);
5494 long proj = get_Proj_proj(node);
5495 ir_mode *mode = get_irn_mode(node);
5498 if (proj == pn_be_Call_M_regular) {
5499 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5501 /* transform call modes */
5502 if (mode_is_data(mode)) {
5503 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5507 /* Map from be_Call to ia32_Call proj number */
5508 if (proj == pn_be_Call_sp) {
5509 proj = pn_ia32_Call_stack;
5510 } else if (proj == pn_be_Call_M_regular) {
5511 proj = pn_ia32_Call_M;
5513 arch_register_req_t const *const req = arch_get_register_req_out(node);
5514 int const n_outs = arch_irn_get_n_outs(new_call);
5517 assert(proj >= pn_be_Call_first_res);
5518 assert(req->type & arch_register_req_type_limited);
5520 for (i = 0; i < n_outs; ++i) {
5521 arch_register_req_t const *const new_req
5522 = arch_get_out_register_req(new_call, i);
5524 if (!(new_req->type & arch_register_req_type_limited) ||
5525 new_req->cls != req->cls ||
5526 *new_req->limited != *req->limited)
5535 res = new_rd_Proj(dbgi, new_call, mode, proj);
5537 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5539 case pn_ia32_Call_stack:
5540 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5543 case pn_ia32_Call_fpcw:
5544 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5552 * Transform the Projs from a Cmp.
5554 static ir_node *gen_Proj_Cmp(ir_node *node)
5556 /* this probably means not all mode_b nodes were lowered... */
5557 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5562 * Transform the Projs from a Bound.
5564 static ir_node *gen_Proj_Bound(ir_node *node)
5567 ir_node *pred = get_Proj_pred(node);
5569 switch (get_Proj_proj(node)) {
5571 return be_transform_node(get_Bound_mem(pred));
5572 case pn_Bound_X_regular:
5573 new_node = be_transform_node(pred);
5574 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5575 case pn_Bound_X_except:
5576 new_node = be_transform_node(pred);
5577 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5579 return be_transform_node(get_Bound_index(pred));
5581 panic("unsupported Proj from Bound");
5585 static ir_node *gen_Proj_ASM(ir_node *node)
5587 ir_mode *mode = get_irn_mode(node);
5588 ir_node *pred = get_Proj_pred(node);
5589 ir_node *new_pred = be_transform_node(pred);
5590 long pos = get_Proj_proj(node);
5592 if (mode == mode_M) {
5593 pos = arch_irn_get_n_outs(new_pred)-1;
5594 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5596 } else if (mode_is_float(mode)) {
5599 panic("unexpected proj mode at ASM");
5602 return new_r_Proj(new_pred, mode, pos);
5606 * Transform and potentially renumber Proj nodes.
5608 static ir_node *gen_Proj(ir_node *node)
5610 ir_node *pred = get_Proj_pred(node);
5613 switch (get_irn_opcode(pred)) {
5615 proj = get_Proj_proj(node);
5616 if (proj == pn_Store_M) {
5617 return be_transform_node(pred);
5619 panic("No idea how to transform proj->Store");
5622 return gen_Proj_Load(node);
5624 return gen_Proj_ASM(node);
5626 return gen_Proj_Builtin(node);
5630 return gen_Proj_DivMod(node);
5632 return gen_Proj_CopyB(node);
5634 return gen_Proj_Quot(node);
5636 return gen_Proj_be_SubSP(node);
5638 return gen_Proj_be_AddSP(node);
5640 return gen_Proj_be_Call(node);
5642 return gen_Proj_Cmp(node);
5644 return gen_Proj_Bound(node);
5646 proj = get_Proj_proj(node);
5648 case pn_Start_X_initial_exec: {
5649 ir_node *block = get_nodes_block(pred);
5650 ir_node *new_block = be_transform_node(block);
5651 dbg_info *dbgi = get_irn_dbg_info(node);
5652 /* we exchange the ProjX with a jump */
5653 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5658 case pn_Start_P_tls:
5659 return gen_Proj_tls(node);
5664 if (is_ia32_l_FloattoLL(pred)) {
5665 return gen_Proj_l_FloattoLL(node);
5667 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5671 ir_mode *mode = get_irn_mode(node);
5672 if (ia32_mode_needs_gp_reg(mode)) {
5673 ir_node *new_pred = be_transform_node(pred);
5674 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5675 get_Proj_proj(node));
5676 new_proj->node_nr = node->node_nr;
5681 return be_duplicate_node(node);
5685 * Enters all transform functions into the generic pointer
5687 static void register_transformers(void)
5689 /* first clear the generic function pointer for all ops */
5690 be_start_transform_setup();
5692 be_set_transform_function(op_Add, gen_Add);
5693 be_set_transform_function(op_And, gen_And);
5694 be_set_transform_function(op_ASM, gen_ASM);
5695 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5696 be_set_transform_function(op_be_Call, gen_be_Call);
5697 be_set_transform_function(op_be_Copy, gen_be_Copy);
5698 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5699 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5700 be_set_transform_function(op_be_Return, gen_be_Return);
5701 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5702 be_set_transform_function(op_Bound, gen_Bound);
5703 be_set_transform_function(op_Builtin, gen_Builtin);
5704 be_set_transform_function(op_Cmp, gen_Cmp);
5705 be_set_transform_function(op_Cond, gen_Cond);
5706 be_set_transform_function(op_Const, gen_Const);
5707 be_set_transform_function(op_Conv, gen_Conv);
5708 be_set_transform_function(op_CopyB, gen_CopyB);
5709 be_set_transform_function(op_Div, gen_Div);
5710 be_set_transform_function(op_DivMod, gen_DivMod);
5711 be_set_transform_function(op_Eor, gen_Eor);
5712 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5713 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5714 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5715 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5716 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5717 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5718 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5719 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5720 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5721 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5722 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5723 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5724 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5725 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5726 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5727 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5728 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5729 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5730 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5731 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5732 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5733 be_set_transform_function(op_IJmp, gen_IJmp);
5734 be_set_transform_function(op_Jmp, gen_Jmp);
5735 be_set_transform_function(op_Load, gen_Load);
5736 be_set_transform_function(op_Minus, gen_Minus);
5737 be_set_transform_function(op_Mod, gen_Mod);
5738 be_set_transform_function(op_Mul, gen_Mul);
5739 be_set_transform_function(op_Mulh, gen_Mulh);
5740 be_set_transform_function(op_Mux, gen_Mux);
5741 be_set_transform_function(op_Not, gen_Not);
5742 be_set_transform_function(op_Or, gen_Or);
5743 be_set_transform_function(op_Phi, gen_Phi);
5744 be_set_transform_function(op_Proj, gen_Proj);
5745 be_set_transform_function(op_Quot, gen_Quot);
5746 be_set_transform_function(op_Rotl, gen_Rotl);
5747 be_set_transform_function(op_Shl, gen_Shl);
5748 be_set_transform_function(op_Shr, gen_Shr);
5749 be_set_transform_function(op_Shrs, gen_Shrs);
5750 be_set_transform_function(op_Store, gen_Store);
5751 be_set_transform_function(op_Sub, gen_Sub);
5752 be_set_transform_function(op_SymConst, gen_SymConst);
5753 be_set_transform_function(op_Unknown, gen_Unknown);
5757 * Pre-transform all unknown and noreg nodes.
5759 static void ia32_pretransform_node(void)
5761 ir_graph *irg = current_ir_graph;
5762 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5764 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5765 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5766 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5768 nomem = get_irg_no_mem(irg);
5769 noreg_GP = ia32_new_NoReg_gp(irg);
5775 * Post-process all calls if we are in SSE mode.
5776 * The ABI requires that the results are in st0, copy them
5777 * to a xmm register.
5779 static void postprocess_fp_call_results(void)
5783 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5784 ir_node *call = call_list[i];
5785 ir_type *mtp = call_types[i];
5788 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5789 ir_type *res_tp = get_method_res_type(mtp, j);
5790 ir_node *res, *new_res;
5791 const ir_edge_t *edge, *next;
5794 if (! is_atomic_type(res_tp)) {
5795 /* no floating point return */
5798 mode = get_type_mode(res_tp);
5799 if (! mode_is_float(mode)) {
5800 /* no floating point return */
5804 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5807 /* now patch the users */
5808 foreach_out_edge_safe(res, edge, next) {
5809 ir_node *succ = get_edge_src_irn(edge);
5812 if (be_is_Keep(succ))
5815 if (is_ia32_xStore(succ)) {
5816 /* an xStore can be patched into an vfst */
5817 dbg_info *db = get_irn_dbg_info(succ);
5818 ir_node *block = get_nodes_block(succ);
5819 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5820 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5821 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5822 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5823 ir_mode *mode = get_ia32_ls_mode(succ);
5825 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5826 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5827 if (is_ia32_use_frame(succ))
5828 set_ia32_use_frame(st);
5829 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5830 set_irn_pinned(st, get_irn_pinned(succ));
5831 set_ia32_op_type(st, ia32_AddrModeD);
5835 if (new_res == NULL) {
5836 dbg_info *db = get_irn_dbg_info(call);
5837 ir_node *block = get_nodes_block(call);
5838 ir_node *frame = get_irg_frame(current_ir_graph);
5839 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5840 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5841 ir_node *vfst, *xld, *new_mem;
5843 /* store st(0) on stack */
5844 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5845 set_ia32_op_type(vfst, ia32_AddrModeD);
5846 set_ia32_use_frame(vfst);
5848 /* load into SSE register */
5849 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5850 set_ia32_op_type(xld, ia32_AddrModeS);
5851 set_ia32_use_frame(xld);
5853 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5854 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5856 if (old_mem != NULL) {
5857 edges_reroute(old_mem, new_mem, current_ir_graph);
5861 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5868 /* do the transformation */
5869 void ia32_transform_graph(ir_graph *irg)
5873 register_transformers();
5874 initial_fpcw = NULL;
5877 be_timer_push(T_HEIGHTS);
5878 heights = heights_new(irg);
5879 be_timer_pop(T_HEIGHTS);
5880 ia32_calculate_non_address_mode_nodes(irg);
5882 /* the transform phase is not safe for CSE (yet) because several nodes get
5883 * attributes set after their creation */
5884 cse_last = get_opt_cse();
5887 call_list = NEW_ARR_F(ir_node *, 0);
5888 call_types = NEW_ARR_F(ir_type *, 0);
5889 be_transform_graph(irg, ia32_pretransform_node);
5891 if (ia32_cg_config.use_sse2)
5892 postprocess_fp_call_results();
5893 DEL_ARR_F(call_types);
5894 DEL_ARR_F(call_list);
5896 set_opt_cse(cse_last);
5898 ia32_free_non_address_mode_nodes();
5899 heights_free(heights);
5903 void ia32_init_transform(void)
5905 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");