2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_util.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *initial_fpcw = NULL;
94 int ia32_no_pic_adjust;
96 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
100 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
110 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem);
113 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
114 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
117 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
119 static ir_node *create_immediate_or_transform(ir_node *node,
120 char immediate_constraint_type);
122 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
123 dbg_info *dbgi, ir_node *block,
124 ir_node *op, ir_node *orig_node);
126 /* its enough to have those once */
127 static ir_node *nomem, *noreg_GP;
129 /** a list to postprocess all calls */
130 static ir_node **call_list;
131 static ir_type **call_types;
133 /** Return non-zero is a node represents the 0 constant. */
134 static bool is_Const_0(ir_node *node)
136 return is_Const(node) && is_Const_null(node);
139 /** Return non-zero is a node represents the 1 constant. */
140 static bool is_Const_1(ir_node *node)
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node)
148 return is_Const(node) && is_Const_all_one(node);
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_x87_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 if (tarval_is_null(tv) || tarval_is_one(tv))
160 /* TODO: match all the other float constants */
165 * returns true if constant can be created with a simple float command
167 static bool is_simple_sse_Const(ir_node *node)
169 ir_tarval *tv = get_Const_tarval(node);
170 ir_mode *mode = get_tarval_mode(tv);
175 if (tarval_is_null(tv)
176 #ifdef CONSTRUCT_SSE_CONST
181 #ifdef CONSTRUCT_SSE_CONST
182 if (mode == mode_D) {
183 unsigned val = get_tarval_sub_bits(tv, 0) |
184 (get_tarval_sub_bits(tv, 1) << 8) |
185 (get_tarval_sub_bits(tv, 2) << 16) |
186 (get_tarval_sub_bits(tv, 3) << 24);
188 /* lower 32bit are zero, really a 32bit constant */
191 #endif /* CONSTRUCT_SSE_CONST */
192 /* TODO: match all the other float constants */
197 * return NoREG or pic_base in case of PIC.
198 * This is necessary as base address for newly created symbols
200 static ir_node *get_symconst_base(void)
202 ir_graph *irg = current_ir_graph;
204 if (be_get_irg_options(irg)->pic) {
205 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
206 return arch_env->impl->get_pic_base(irg);
213 * Transforms a Const.
215 static ir_node *gen_Const(ir_node *node)
217 ir_node *old_block = get_nodes_block(node);
218 ir_node *block = be_transform_node(old_block);
219 dbg_info *dbgi = get_irn_dbg_info(node);
220 ir_mode *mode = get_irn_mode(node);
222 assert(is_Const(node));
224 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
262 #ifdef CONSTRUCT_SSE_CONST
263 if (mode == mode_D) {
264 unsigned val = get_tarval_sub_bits(tv, 0) |
265 (get_tarval_sub_bits(tv, 1) << 8) |
266 (get_tarval_sub_bits(tv, 2) << 16) |
267 (get_tarval_sub_bits(tv, 3) << 24);
269 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
270 ir_node *cnst, *psllq;
272 /* fine, lower 32bit are zero, produce 32bit value */
273 val = get_tarval_sub_bits(tv, 4) |
274 (get_tarval_sub_bits(tv, 5) << 8) |
275 (get_tarval_sub_bits(tv, 6) << 16) |
276 (get_tarval_sub_bits(tv, 7) << 24);
277 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
278 load = new_bd_ia32_xMovd(dbgi, block, cnst);
279 set_ia32_ls_mode(load, mode);
280 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
281 set_ia32_ls_mode(psllq, mode);
286 #endif /* CONSTRUCT_SSE_CONST */
287 floatent = ia32_create_float_const_entity(node);
289 base = get_symconst_base();
290 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
292 set_ia32_op_type(load, ia32_AddrModeS);
293 set_ia32_am_sc(load, floatent);
294 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
295 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
298 if (is_Const_null(node)) {
299 load = new_bd_ia32_vfldz(dbgi, block);
301 set_ia32_ls_mode(load, mode);
302 } else if (is_Const_one(node)) {
303 load = new_bd_ia32_vfld1(dbgi, block);
305 set_ia32_ls_mode(load, mode);
310 floatent = ia32_create_float_const_entity(node);
311 /* create_float_const_ent is smart and sometimes creates
313 ls_mode = get_type_mode(get_entity_type(floatent));
314 base = get_symconst_base();
315 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
317 set_ia32_op_type(load, ia32_AddrModeS);
318 set_ia32_am_sc(load, floatent);
319 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
320 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
323 #ifdef CONSTRUCT_SSE_CONST
325 #endif /* CONSTRUCT_SSE_CONST */
326 SET_IA32_ORIG_NODE(load, node);
328 be_dep_on_frame(load);
330 } else { /* non-float mode */
332 ir_tarval *tv = get_Const_tarval(node);
335 tv = tarval_convert_to(tv, mode_Iu);
337 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
339 panic("couldn't convert constant tarval (%+F)", node);
341 val = get_tarval_long(tv);
343 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
344 SET_IA32_ORIG_NODE(cnst, node);
346 be_dep_on_frame(cnst);
352 * Transforms a SymConst.
354 static ir_node *gen_SymConst(ir_node *node)
356 ir_node *old_block = get_nodes_block(node);
357 ir_node *block = be_transform_node(old_block);
358 dbg_info *dbgi = get_irn_dbg_info(node);
359 ir_mode *mode = get_irn_mode(node);
362 if (mode_is_float(mode)) {
363 if (ia32_cg_config.use_sse2)
364 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 set_ia32_am_sc(cnst, get_SymConst_entity(node));
368 set_ia32_use_frame(cnst);
372 if (get_SymConst_kind(node) != symconst_addr_ent) {
373 panic("backend only support symconst_addr_ent (at %+F)", node);
375 entity = get_SymConst_entity(node);
376 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
379 SET_IA32_ORIG_NODE(cnst, node);
381 be_dep_on_frame(cnst);
386 * Create a float type for the given mode and cache it.
388 * @param mode the mode for the float type (might be integer mode for SSE2 types)
389 * @param align alignment
391 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
397 if (mode == mode_Iu) {
398 static ir_type *int_Iu[16] = {NULL, };
400 if (int_Iu[align] == NULL) {
401 int_Iu[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return int_Iu[align];
406 } else if (mode == mode_Lu) {
407 static ir_type *int_Lu[16] = {NULL, };
409 if (int_Lu[align] == NULL) {
410 int_Lu[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return int_Lu[align];
415 } else if (mode == mode_F) {
416 static ir_type *float_F[16] = {NULL, };
418 if (float_F[align] == NULL) {
419 float_F[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_F[align];
424 } else if (mode == mode_D) {
425 static ir_type *float_D[16] = {NULL, };
427 if (float_D[align] == NULL) {
428 float_D[align] = tp = new_type_primitive(mode);
429 /* set the specified alignment */
430 set_type_alignment_bytes(tp, align);
432 return float_D[align];
434 static ir_type *float_E[16] = {NULL, };
436 if (float_E[align] == NULL) {
437 float_E[align] = tp = new_type_primitive(mode);
438 /* set the specified alignment */
439 set_type_alignment_bytes(tp, align);
441 return float_E[align];
446 * Create a float[2] array type for the given atomic type.
448 * @param tp the atomic type
450 static ir_type *ia32_create_float_array(ir_type *tp)
452 ir_mode *mode = get_type_mode(tp);
453 unsigned align = get_type_alignment_bytes(tp);
458 if (mode == mode_F) {
459 static ir_type *float_F[16] = {NULL, };
461 if (float_F[align] != NULL)
462 return float_F[align];
463 arr = float_F[align] = new_type_array(1, tp);
464 } else if (mode == mode_D) {
465 static ir_type *float_D[16] = {NULL, };
467 if (float_D[align] != NULL)
468 return float_D[align];
469 arr = float_D[align] = new_type_array(1, tp);
471 static ir_type *float_E[16] = {NULL, };
473 if (float_E[align] != NULL)
474 return float_E[align];
475 arr = float_E[align] = new_type_array(1, tp);
477 set_type_alignment_bytes(arr, align);
478 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
479 set_type_state(arr, layout_fixed);
483 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
484 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
486 static const struct {
487 const char *ent_name;
488 const char *cnst_str;
491 } names [ia32_known_const_max] = {
492 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
493 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
494 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
495 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
496 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
498 static ir_entity *ent_cache[ia32_known_const_max];
500 const char *ent_name, *cnst_str;
506 ent_name = names[kct].ent_name;
507 if (! ent_cache[kct]) {
508 cnst_str = names[kct].cnst_str;
510 switch (names[kct].mode) {
511 case 0: mode = mode_Iu; break;
512 case 1: mode = mode_Lu; break;
513 default: mode = mode_F; break;
515 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
516 tp = ia32_create_float_type(mode, names[kct].align);
518 if (kct == ia32_ULLBIAS)
519 tp = ia32_create_float_array(tp);
520 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
522 set_entity_ld_ident(ent, get_entity_ident(ent));
523 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
524 set_entity_visibility(ent, ir_visibility_private);
526 if (kct == ia32_ULLBIAS) {
527 ir_initializer_t *initializer = create_initializer_compound(2);
529 set_initializer_compound_value(initializer, 0,
530 create_initializer_tarval(get_mode_null(mode)));
531 set_initializer_compound_value(initializer, 1,
532 create_initializer_tarval(tv));
534 set_entity_initializer(ent, initializer);
536 set_entity_initializer(ent, create_initializer_tarval(tv));
539 /* cache the entry */
540 ent_cache[kct] = ent;
543 return ent_cache[kct];
547 * return true if the node is a Proj(Load) and could be used in source address
548 * mode for another node. Will return only true if the @p other node is not
549 * dependent on the memory of the Load (for binary operations use the other
550 * input here, for unary operations use NULL).
552 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
553 ir_node *other, ir_node *other2, match_flags_t flags)
558 /* float constants are always available */
559 if (is_Const(node)) {
560 ir_mode *mode = get_irn_mode(node);
561 if (mode_is_float(mode)) {
562 if (ia32_cg_config.use_sse2) {
563 if (is_simple_sse_Const(node))
566 if (is_simple_x87_Const(node))
569 if (get_irn_n_edges(node) > 1)
577 load = get_Proj_pred(node);
578 pn = get_Proj_proj(node);
579 if (!is_Load(load) || pn != pn_Load_res)
581 if (get_nodes_block(load) != block)
583 /* we only use address mode if we're the only user of the load */
584 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
586 /* in some edge cases with address mode we might reach the load normally
587 * and through some AM sequence, if it is already materialized then we
588 * can't create an AM node from it */
589 if (be_is_transformed(node))
592 /* don't do AM if other node inputs depend on the load (via mem-proj) */
593 if (other != NULL && ia32_prevents_AM(block, load, other))
596 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
602 typedef struct ia32_address_mode_t ia32_address_mode_t;
603 struct ia32_address_mode_t {
608 ia32_op_type_t op_type;
612 unsigned commutative : 1;
613 unsigned ins_permuted : 1;
616 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
618 /* construct load address */
619 memset(addr, 0, sizeof(addr[0]));
620 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
622 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
623 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
624 addr->mem = be_transform_node(mem);
627 static void build_address(ia32_address_mode_t *am, ir_node *node,
628 ia32_create_am_flags_t flags)
630 ia32_address_t *addr = &am->addr;
636 /* floating point immediates */
637 if (is_Const(node)) {
638 ir_entity *entity = ia32_create_float_const_entity(node);
639 addr->base = get_symconst_base();
640 addr->index = noreg_GP;
642 addr->symconst_ent = entity;
644 am->ls_mode = get_type_mode(get_entity_type(entity));
645 am->pinned = op_pin_state_floats;
649 load = get_Proj_pred(node);
650 ptr = get_Load_ptr(load);
651 mem = get_Load_mem(load);
652 new_mem = be_transform_node(mem);
653 am->pinned = get_irn_pinned(load);
654 am->ls_mode = get_Load_mode(load);
655 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
658 /* construct load address */
659 ia32_create_address_mode(addr, ptr, flags);
661 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
662 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
666 static void set_address(ir_node *node, const ia32_address_t *addr)
668 set_ia32_am_scale(node, addr->scale);
669 set_ia32_am_sc(node, addr->symconst_ent);
670 set_ia32_am_offs_int(node, addr->offset);
671 if (addr->symconst_sign)
672 set_ia32_am_sc_sign(node);
674 set_ia32_use_frame(node);
675 set_ia32_frame_ent(node, addr->frame_entity);
679 * Apply attributes of a given address mode to a node.
681 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
683 set_address(node, &am->addr);
685 set_ia32_op_type(node, am->op_type);
686 set_ia32_ls_mode(node, am->ls_mode);
687 if (am->pinned == op_pin_state_pinned) {
688 /* beware: some nodes are already pinned and did not allow to change the state */
689 if (get_irn_pinned(node) != op_pin_state_pinned)
690 set_irn_pinned(node, op_pin_state_pinned);
693 set_ia32_commutative(node);
697 * Check, if a given node is a Down-Conv, ie. a integer Conv
698 * from a mode with a mode with more bits to a mode with lesser bits.
699 * Moreover, we return only true if the node has not more than 1 user.
701 * @param node the node
702 * @return non-zero if node is a Down-Conv
704 static int is_downconv(const ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
726 /** Skip all Down-Conv's on a given node and return the resulting node. */
727 ir_node *ia32_skip_downconv(ir_node *node)
729 while (is_downconv(node))
730 node = get_Conv_op(node);
735 static bool is_sameconv(ir_node *node)
743 /* we only want to skip the conv when we're the only user
744 * (because this test is used in the context of address-mode selection
745 * and we don't want to use address mode for multiple users) */
746 if (get_irn_n_edges(node) > 1)
749 src_mode = get_irn_mode(get_Conv_op(node));
750 dest_mode = get_irn_mode(node);
752 ia32_mode_needs_gp_reg(src_mode) &&
753 ia32_mode_needs_gp_reg(dest_mode) &&
754 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
757 /** Skip all signedness convs */
758 static ir_node *ia32_skip_sameconv(ir_node *node)
760 while (is_sameconv(node))
761 node = get_Conv_op(node);
766 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
768 ir_mode *mode = get_irn_mode(node);
773 if (mode_is_signed(mode)) {
778 block = get_nodes_block(node);
779 dbgi = get_irn_dbg_info(node);
781 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
785 * matches operands of a node into ia32 addressing/operand modes. This covers
786 * usage of source address mode, immediates, operations with non 32-bit modes,
788 * The resulting data is filled into the @p am struct. block is the block
789 * of the node whose arguments are matched. op1, op2 are the first and second
790 * input that are matched (op1 may be NULL). other_op is another unrelated
791 * input that is not matched! but which is needed sometimes to check if AM
792 * for op1/op2 is legal.
793 * @p flags describes the supported modes of the operation in detail.
795 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
796 ir_node *op1, ir_node *op2, ir_node *other_op,
799 ia32_address_t *addr = &am->addr;
800 ir_mode *mode = get_irn_mode(op2);
801 int mode_bits = get_mode_size_bits(mode);
802 ir_node *new_op1, *new_op2;
804 unsigned commutative;
805 int use_am_and_immediates;
808 memset(am, 0, sizeof(am[0]));
810 commutative = (flags & match_commutative) != 0;
811 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
812 use_am = (flags & match_am) != 0;
813 use_immediate = (flags & match_immediate) != 0;
814 assert(!use_am_and_immediates || use_immediate);
817 assert(!commutative || op1 != NULL);
818 assert(use_am || !(flags & match_8bit_am));
819 assert(use_am || !(flags & match_16bit_am));
821 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
822 (mode_bits == 16 && !(flags & match_16bit_am))) {
826 /* we can simply skip downconvs for mode neutral nodes: the upper bits
827 * can be random for these operations */
828 if (flags & match_mode_neutral) {
829 op2 = ia32_skip_downconv(op2);
831 op1 = ia32_skip_downconv(op1);
834 op2 = ia32_skip_sameconv(op2);
836 op1 = ia32_skip_sameconv(op1);
840 /* match immediates. firm nodes are normalized: constants are always on the
843 if (!(flags & match_try_am) && use_immediate) {
844 new_op2 = ia32_try_create_Immediate(op2, 0);
847 if (new_op2 == NULL &&
848 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
849 build_address(am, op2, ia32_create_am_normal);
850 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
851 if (mode_is_float(mode)) {
852 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
856 am->op_type = ia32_AddrModeS;
857 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
859 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
861 build_address(am, op1, ia32_create_am_normal);
863 if (mode_is_float(mode)) {
864 noreg = ia32_new_NoReg_vfp(current_ir_graph);
869 if (new_op2 != NULL) {
872 new_op1 = be_transform_node(op2);
874 am->ins_permuted = 1;
876 am->op_type = ia32_AddrModeS;
879 am->op_type = ia32_Normal;
881 if (flags & match_try_am) {
887 mode = get_irn_mode(op2);
888 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
889 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
891 new_op2 = create_upconv(op2, NULL);
892 am->ls_mode = mode_Iu;
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
897 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1037 if (initial_fpcw != NULL)
1038 return initial_fpcw;
1040 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1041 &ia32_registers[REG_FPCW]);
1042 initial_fpcw = be_transform_node(fpcw);
1044 return initial_fpcw;
1048 * Construct a standard binary operation, set AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_binop_float_func *func)
1058 ir_mode *mode = get_irn_mode(node);
1060 ir_node *block, *new_block, *new_node;
1061 ia32_address_mode_t am;
1062 ia32_address_t *addr = &am.addr;
1063 ia32_x87_attr_t *attr;
1064 /* All operations are considered commutative, because there are reverse
1066 match_flags_t flags = match_commutative;
1068 /* happens for div nodes... */
1069 if (mode == mode_T) {
1071 mode = get_Div_resmode(node);
1072 else if (is_Mod(node))
1073 mode = get_Mod_resmode(node);
1075 panic("can't determine mode");
1078 /* cannot use address mode with long double on x87 */
1079 if (get_mode_size_bits(mode) <= 64)
1082 block = get_nodes_block(node);
1083 match_arguments(&am, block, op1, op2, NULL, flags);
1085 dbgi = get_irn_dbg_info(node);
1086 new_block = be_transform_node(block);
1087 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1088 am.new_op1, am.new_op2, get_fpcw());
1089 set_am_attributes(new_node, &am);
1091 attr = get_ia32_x87_attr(new_node);
1092 attr->attr.data.ins_permuted = am.ins_permuted;
1094 SET_IA32_ORIG_NODE(new_node, node);
1096 new_node = fix_mem_proj(new_node, &am);
1102 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1104 * @param op1 The first operand
1105 * @param op2 The second operand
1106 * @param func The node constructor function
1107 * @return The constructed ia32 node.
1109 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1110 construct_shift_func *func,
1111 match_flags_t flags)
1114 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1116 assert(! mode_is_float(get_irn_mode(node)));
1117 assert(flags & match_immediate);
1118 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1120 if (flags & match_mode_neutral) {
1121 op1 = ia32_skip_downconv(op1);
1122 new_op1 = be_transform_node(op1);
1123 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1124 new_op1 = create_upconv(op1, node);
1126 new_op1 = be_transform_node(op1);
1129 /* the shift amount can be any mode that is bigger than 5 bits, since all
1130 * other bits are ignored anyway */
1131 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1132 ir_node *const op = get_Conv_op(op2);
1133 if (mode_is_float(get_irn_mode(op)))
1136 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1138 new_op2 = create_immediate_or_transform(op2, 0);
1140 dbgi = get_irn_dbg_info(node);
1141 block = get_nodes_block(node);
1142 new_block = be_transform_node(block);
1143 new_node = func(dbgi, new_block, new_op1, new_op2);
1144 SET_IA32_ORIG_NODE(new_node, node);
1146 /* lowered shift instruction may have a dependency operand, handle it here */
1147 if (get_irn_arity(node) == 3) {
1148 /* we have a dependency */
1149 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1150 add_irn_dep(new_node, new_dep);
1158 * Construct a standard unary operation, set AM and immediate if required.
1160 * @param op The operand
1161 * @param func The node constructor function
1162 * @return The constructed ia32 node.
1164 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1165 match_flags_t flags)
1168 ir_node *block, *new_block, *new_op, *new_node;
1170 assert(flags == 0 || flags == match_mode_neutral);
1171 if (flags & match_mode_neutral) {
1172 op = ia32_skip_downconv(op);
1175 new_op = be_transform_node(op);
1176 dbgi = get_irn_dbg_info(node);
1177 block = get_nodes_block(node);
1178 new_block = be_transform_node(block);
1179 new_node = func(dbgi, new_block, new_op);
1181 SET_IA32_ORIG_NODE(new_node, node);
1186 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1187 ia32_address_t *addr)
1189 ir_node *base, *index, *res;
1195 base = be_transform_node(base);
1198 index = addr->index;
1199 if (index == NULL) {
1202 index = be_transform_node(index);
1205 res = new_bd_ia32_Lea(dbgi, block, base, index);
1206 set_address(res, addr);
1212 * Returns non-zero if a given address mode has a symbolic or
1213 * numerical offset != 0.
1215 static int am_has_immediates(const ia32_address_t *addr)
1217 return addr->offset != 0 || addr->symconst_ent != NULL
1218 || addr->frame_entity || addr->use_frame;
1222 * Creates an ia32 Add.
1224 * @return the created ia32 Add node
1226 static ir_node *gen_Add(ir_node *node)
1228 ir_mode *mode = get_irn_mode(node);
1229 ir_node *op1 = get_Add_left(node);
1230 ir_node *op2 = get_Add_right(node);
1232 ir_node *block, *new_block, *new_node, *add_immediate_op;
1233 ia32_address_t addr;
1234 ia32_address_mode_t am;
1236 if (mode_is_float(mode)) {
1237 if (ia32_cg_config.use_sse2)
1238 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1239 match_commutative | match_am);
1241 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1244 ia32_mark_non_am(node);
1246 op2 = ia32_skip_downconv(op2);
1247 op1 = ia32_skip_downconv(op1);
1251 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1252 * 1. Add with immediate -> Lea
1253 * 2. Add with possible source address mode -> Add
1254 * 3. Otherwise -> Lea
1256 memset(&addr, 0, sizeof(addr));
1257 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1258 add_immediate_op = NULL;
1260 dbgi = get_irn_dbg_info(node);
1261 block = get_nodes_block(node);
1262 new_block = be_transform_node(block);
1265 if (addr.base == NULL && addr.index == NULL) {
1266 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1267 addr.symconst_sign, 0, addr.offset);
1268 be_dep_on_frame(new_node);
1269 SET_IA32_ORIG_NODE(new_node, node);
1272 /* add with immediate? */
1273 if (addr.index == NULL) {
1274 add_immediate_op = addr.base;
1275 } else if (addr.base == NULL && addr.scale == 0) {
1276 add_immediate_op = addr.index;
1279 if (add_immediate_op != NULL) {
1280 if (!am_has_immediates(&addr)) {
1281 #ifdef DEBUG_libfirm
1282 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1285 return be_transform_node(add_immediate_op);
1288 new_node = create_lea_from_address(dbgi, new_block, &addr);
1289 SET_IA32_ORIG_NODE(new_node, node);
1293 /* test if we can use source address mode */
1294 match_arguments(&am, block, op1, op2, NULL, match_commutative
1295 | match_mode_neutral | match_am | match_immediate | match_try_am);
1297 /* construct an Add with source address mode */
1298 if (am.op_type == ia32_AddrModeS) {
1299 ia32_address_t *am_addr = &am.addr;
1300 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1301 am_addr->index, am_addr->mem, am.new_op1,
1303 set_am_attributes(new_node, &am);
1304 SET_IA32_ORIG_NODE(new_node, node);
1306 new_node = fix_mem_proj(new_node, &am);
1311 /* otherwise construct a lea */
1312 new_node = create_lea_from_address(dbgi, new_block, &addr);
1313 SET_IA32_ORIG_NODE(new_node, node);
1318 * Creates an ia32 Mul.
1320 * @return the created ia32 Mul node
1322 static ir_node *gen_Mul(ir_node *node)
1324 ir_node *op1 = get_Mul_left(node);
1325 ir_node *op2 = get_Mul_right(node);
1326 ir_mode *mode = get_irn_mode(node);
1328 if (mode_is_float(mode)) {
1329 if (ia32_cg_config.use_sse2)
1330 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1331 match_commutative | match_am);
1333 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1335 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1336 match_commutative | match_am | match_mode_neutral |
1337 match_immediate | match_am_and_immediates);
1341 * Creates an ia32 Mulh.
1342 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1343 * this result while Mul returns the lower 32 bit.
1345 * @return the created ia32 Mulh node
1347 static ir_node *gen_Mulh(ir_node *node)
1349 dbg_info *dbgi = get_irn_dbg_info(node);
1350 ir_node *op1 = get_Mulh_left(node);
1351 ir_node *op2 = get_Mulh_right(node);
1352 ir_mode *mode = get_irn_mode(node);
1354 ir_node *proj_res_high;
1356 if (get_mode_size_bits(mode) != 32) {
1357 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1360 if (mode_is_signed(mode)) {
1361 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1362 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1364 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1365 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1367 return proj_res_high;
1371 * Creates an ia32 And.
1373 * @return The created ia32 And node
1375 static ir_node *gen_And(ir_node *node)
1377 ir_node *op1 = get_And_left(node);
1378 ir_node *op2 = get_And_right(node);
1379 assert(! mode_is_float(get_irn_mode(node)));
1381 /* is it a zero extension? */
1382 if (is_Const(op2)) {
1383 ir_tarval *tv = get_Const_tarval(op2);
1384 long v = get_tarval_long(tv);
1386 if (v == 0xFF || v == 0xFFFF) {
1387 dbg_info *dbgi = get_irn_dbg_info(node);
1388 ir_node *block = get_nodes_block(node);
1395 assert(v == 0xFFFF);
1398 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1403 return gen_binop(node, op1, op2, new_bd_ia32_And,
1404 match_commutative | match_mode_neutral | match_am | match_immediate);
1410 * Creates an ia32 Or.
1412 * @return The created ia32 Or node
1414 static ir_node *gen_Or(ir_node *node)
1416 ir_node *op1 = get_Or_left(node);
1417 ir_node *op2 = get_Or_right(node);
1419 assert (! mode_is_float(get_irn_mode(node)));
1420 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1421 | match_mode_neutral | match_am | match_immediate);
1427 * Creates an ia32 Eor.
1429 * @return The created ia32 Eor node
1431 static ir_node *gen_Eor(ir_node *node)
1433 ir_node *op1 = get_Eor_left(node);
1434 ir_node *op2 = get_Eor_right(node);
1436 assert(! mode_is_float(get_irn_mode(node)));
1437 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1438 | match_mode_neutral | match_am | match_immediate);
1443 * Creates an ia32 Sub.
1445 * @return The created ia32 Sub node
1447 static ir_node *gen_Sub(ir_node *node)
1449 ir_node *op1 = get_Sub_left(node);
1450 ir_node *op2 = get_Sub_right(node);
1451 ir_mode *mode = get_irn_mode(node);
1453 if (mode_is_float(mode)) {
1454 if (ia32_cg_config.use_sse2)
1455 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1457 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1460 if (is_Const(op2)) {
1461 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1465 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1466 | match_am | match_immediate);
1469 static ir_node *transform_AM_mem(ir_node *const block,
1470 ir_node *const src_val,
1471 ir_node *const src_mem,
1472 ir_node *const am_mem)
1474 if (is_NoMem(am_mem)) {
1475 return be_transform_node(src_mem);
1476 } else if (is_Proj(src_val) &&
1478 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1479 /* avoid memory loop */
1481 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1482 ir_node *const ptr_pred = get_Proj_pred(src_val);
1483 int const arity = get_Sync_n_preds(src_mem);
1488 NEW_ARR_A(ir_node*, ins, arity + 1);
1490 /* NOTE: This sometimes produces dead-code because the old sync in
1491 * src_mem might not be used anymore, we should detect this case
1492 * and kill the sync... */
1493 for (i = arity - 1; i >= 0; --i) {
1494 ir_node *const pred = get_Sync_pred(src_mem, i);
1496 /* avoid memory loop */
1497 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1500 ins[n++] = be_transform_node(pred);
1505 return new_r_Sync(block, n, ins);
1509 ins[0] = be_transform_node(src_mem);
1511 return new_r_Sync(block, 2, ins);
1516 * Create a 32bit to 64bit signed extension.
1518 * @param dbgi debug info
1519 * @param block the block where node nodes should be placed
1520 * @param val the value to extend
1521 * @param orig the original node
1523 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1524 ir_node *val, const ir_node *orig)
1529 if (ia32_cg_config.use_short_sex_eax) {
1530 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1531 be_dep_on_frame(pval);
1532 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1534 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1535 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1537 SET_IA32_ORIG_NODE(res, orig);
1542 * Generates an ia32 Div with additional infrastructure for the
1543 * register allocator if needed.
1545 static ir_node *create_Div(ir_node *node)
1547 dbg_info *dbgi = get_irn_dbg_info(node);
1548 ir_node *block = get_nodes_block(node);
1549 ir_node *new_block = be_transform_node(block);
1556 ir_node *sign_extension;
1557 ia32_address_mode_t am;
1558 ia32_address_t *addr = &am.addr;
1560 /* the upper bits have random contents for smaller modes */
1561 switch (get_irn_opcode(node)) {
1563 op1 = get_Div_left(node);
1564 op2 = get_Div_right(node);
1565 mem = get_Div_mem(node);
1566 mode = get_Div_resmode(node);
1569 op1 = get_Mod_left(node);
1570 op2 = get_Mod_right(node);
1571 mem = get_Mod_mem(node);
1572 mode = get_Mod_resmode(node);
1575 panic("invalid divmod node %+F", node);
1578 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1580 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1581 is the memory of the consumed address. We can have only the second op as address
1582 in Div nodes, so check only op2. */
1583 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1585 if (mode_is_signed(mode)) {
1586 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1587 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1588 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1590 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1591 be_dep_on_frame(sign_extension);
1593 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1594 addr->index, new_mem, am.new_op2,
1595 am.new_op1, sign_extension);
1598 set_irn_pinned(new_node, get_irn_pinned(node));
1600 set_am_attributes(new_node, &am);
1601 SET_IA32_ORIG_NODE(new_node, node);
1603 new_node = fix_mem_proj(new_node, &am);
1609 * Generates an ia32 Mod.
1611 static ir_node *gen_Mod(ir_node *node)
1613 return create_Div(node);
1617 * Generates an ia32 Div.
1619 static ir_node *gen_Div(ir_node *node)
1621 return create_Div(node);
1626 * Creates an ia32 floating Div.
1628 * @return The created ia32 xDiv node
1630 static ir_node *gen_Quot(ir_node *node)
1632 ir_node *op1 = get_Quot_left(node);
1633 ir_node *op2 = get_Quot_right(node);
1635 if (ia32_cg_config.use_sse2) {
1636 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1638 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1644 * Creates an ia32 Shl.
1646 * @return The created ia32 Shl node
1648 static ir_node *gen_Shl(ir_node *node)
1650 ir_node *left = get_Shl_left(node);
1651 ir_node *right = get_Shl_right(node);
1653 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1654 match_mode_neutral | match_immediate);
1658 * Creates an ia32 Shr.
1660 * @return The created ia32 Shr node
1662 static ir_node *gen_Shr(ir_node *node)
1664 ir_node *left = get_Shr_left(node);
1665 ir_node *right = get_Shr_right(node);
1667 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1673 * Creates an ia32 Sar.
1675 * @return The created ia32 Shrs node
1677 static ir_node *gen_Shrs(ir_node *node)
1679 ir_node *left = get_Shrs_left(node);
1680 ir_node *right = get_Shrs_right(node);
1682 if (is_Const(right)) {
1683 ir_tarval *tv = get_Const_tarval(right);
1684 long val = get_tarval_long(tv);
1686 /* this is a sign extension */
1687 dbg_info *dbgi = get_irn_dbg_info(node);
1688 ir_node *block = be_transform_node(get_nodes_block(node));
1689 ir_node *new_op = be_transform_node(left);
1691 return create_sex_32_64(dbgi, block, new_op, node);
1695 /* 8 or 16 bit sign extension? */
1696 if (is_Const(right) && is_Shl(left)) {
1697 ir_node *shl_left = get_Shl_left(left);
1698 ir_node *shl_right = get_Shl_right(left);
1699 if (is_Const(shl_right)) {
1700 ir_tarval *tv1 = get_Const_tarval(right);
1701 ir_tarval *tv2 = get_Const_tarval(shl_right);
1702 if (tv1 == tv2 && tarval_is_long(tv1)) {
1703 long val = get_tarval_long(tv1);
1704 if (val == 16 || val == 24) {
1705 dbg_info *dbgi = get_irn_dbg_info(node);
1706 ir_node *block = get_nodes_block(node);
1716 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1725 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1731 * Creates an ia32 Rol.
1733 * @param op1 The first operator
1734 * @param op2 The second operator
1735 * @return The created ia32 RotL node
1737 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1739 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1745 * Creates an ia32 Ror.
1746 * NOTE: There is no RotR with immediate because this would always be a RotL
1747 * "imm-mode_size_bits" which can be pre-calculated.
1749 * @param op1 The first operator
1750 * @param op2 The second operator
1751 * @return The created ia32 RotR node
1753 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1755 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1761 * Creates an ia32 RotR or RotL (depending on the found pattern).
1763 * @return The created ia32 RotL or RotR node
1765 static ir_node *gen_Rotl(ir_node *node)
1767 ir_node *op1 = get_Rotl_left(node);
1768 ir_node *op2 = get_Rotl_right(node);
1770 if (is_Minus(op2)) {
1771 return gen_Ror(node, op1, get_Minus_op(op2));
1774 return gen_Rol(node, op1, op2);
1780 * Transforms a Minus node.
1782 * @return The created ia32 Minus node
1784 static ir_node *gen_Minus(ir_node *node)
1786 ir_node *op = get_Minus_op(node);
1787 ir_node *block = be_transform_node(get_nodes_block(node));
1788 dbg_info *dbgi = get_irn_dbg_info(node);
1789 ir_mode *mode = get_irn_mode(node);
1794 if (mode_is_float(mode)) {
1795 ir_node *new_op = be_transform_node(op);
1796 if (ia32_cg_config.use_sse2) {
1797 /* TODO: non-optimal... if we have many xXors, then we should
1798 * rather create a load for the const and use that instead of
1799 * several AM nodes... */
1800 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1802 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1803 noreg_GP, nomem, new_op, noreg_xmm);
1805 size = get_mode_size_bits(mode);
1806 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1808 set_ia32_am_sc(new_node, ent);
1809 set_ia32_op_type(new_node, ia32_AddrModeS);
1810 set_ia32_ls_mode(new_node, mode);
1812 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1815 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1818 SET_IA32_ORIG_NODE(new_node, node);
1824 * Transforms a Not node.
1826 * @return The created ia32 Not node
1828 static ir_node *gen_Not(ir_node *node)
1830 ir_node *op = get_Not_op(node);
1832 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1833 assert (! mode_is_float(get_irn_mode(node)));
1835 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1838 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1839 bool negate, ir_node *node)
1841 ir_node *new_block = be_transform_node(block);
1842 ir_mode *mode = get_irn_mode(op);
1848 if (mode_is_float(mode)) {
1849 new_op = be_transform_node(op);
1851 if (ia32_cg_config.use_sse2) {
1852 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1853 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1854 noreg_GP, nomem, new_op, noreg_fp);
1856 size = get_mode_size_bits(mode);
1857 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1859 set_ia32_am_sc(new_node, ent);
1861 SET_IA32_ORIG_NODE(new_node, node);
1863 set_ia32_op_type(new_node, ia32_AddrModeS);
1864 set_ia32_ls_mode(new_node, mode);
1866 /* TODO, implement -Abs case */
1869 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1870 SET_IA32_ORIG_NODE(new_node, node);
1872 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1873 SET_IA32_ORIG_NODE(new_node, node);
1878 ir_node *sign_extension;
1880 if (get_mode_size_bits(mode) == 32) {
1881 new_op = be_transform_node(op);
1883 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1886 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1888 xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1889 nomem, new_op, sign_extension);
1890 SET_IA32_ORIG_NODE(xorn, node);
1893 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1894 nomem, sign_extension, xorn);
1896 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1897 nomem, xorn, sign_extension);
1899 SET_IA32_ORIG_NODE(new_node, node);
1906 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1908 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1910 dbg_info *dbgi = get_irn_dbg_info(cmp);
1911 ir_node *block = get_nodes_block(cmp);
1912 ir_node *new_block = be_transform_node(block);
1913 ir_node *op1 = be_transform_node(x);
1914 ir_node *op2 = be_transform_node(n);
1916 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1920 * Transform a node returning a "flag" result.
1922 * @param node the node to transform
1923 * @param pnc_out the compare mode to use
1925 static ir_node *get_flags_node(ir_node *node, int *pnc_out)
1932 /* we have a Cmp as input */
1933 if (is_Proj(node)) {
1934 ir_node *pred = get_Proj_pred(node);
1936 int pnc = get_Proj_pn_cmp(node);
1937 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1938 ir_node *l = get_Cmp_left(pred);
1939 ir_node *r = get_Cmp_right(pred);
1941 ir_node *la = get_And_left(l);
1942 ir_node *ra = get_And_right(l);
1944 ir_node *c = get_Shl_left(la);
1945 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1946 /* (1 << n) & ra) */
1947 ir_node *n = get_Shl_right(la);
1948 flags = gen_bt(pred, ra, n);
1949 /* we must generate a Jc/Jnc jump */
1950 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1953 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1958 ir_node *c = get_Shl_left(ra);
1959 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1960 /* la & (1 << n)) */
1961 ir_node *n = get_Shl_right(ra);
1962 flags = gen_bt(pred, la, n);
1963 /* we must generate a Jc/Jnc jump */
1964 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1967 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1973 /* add ia32 compare flags */
1975 ir_node *l = get_Cmp_left(pred);
1976 ir_mode *mode = get_irn_mode(l);
1977 if (mode_is_float(mode))
1978 pnc |= ia32_pn_Cmp_float;
1979 else if (! mode_is_signed(mode))
1980 pnc |= ia32_pn_Cmp_unsigned;
1983 flags = be_transform_node(pred);
1988 /* a mode_b value, we have to compare it against 0 */
1989 dbgi = get_irn_dbg_info(node);
1990 new_block = be_transform_node(get_nodes_block(node));
1991 new_op = be_transform_node(node);
1992 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1993 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1994 *pnc_out = pn_Cmp_Lg;
1999 * Transforms a Load.
2001 * @return the created ia32 Load node
2003 static ir_node *gen_Load(ir_node *node)
2005 ir_node *old_block = get_nodes_block(node);
2006 ir_node *block = be_transform_node(old_block);
2007 ir_node *ptr = get_Load_ptr(node);
2008 ir_node *mem = get_Load_mem(node);
2009 ir_node *new_mem = be_transform_node(mem);
2012 dbg_info *dbgi = get_irn_dbg_info(node);
2013 ir_mode *mode = get_Load_mode(node);
2015 ia32_address_t addr;
2017 /* construct load address */
2018 memset(&addr, 0, sizeof(addr));
2019 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2026 base = be_transform_node(base);
2029 if (index == NULL) {
2032 index = be_transform_node(index);
2035 if (mode_is_float(mode)) {
2036 if (ia32_cg_config.use_sse2) {
2037 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2040 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2044 assert(mode != mode_b);
2046 /* create a conv node with address mode for smaller modes */
2047 if (get_mode_size_bits(mode) < 32) {
2048 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2049 new_mem, noreg_GP, mode);
2051 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2055 set_irn_pinned(new_node, get_irn_pinned(node));
2056 set_ia32_op_type(new_node, ia32_AddrModeS);
2057 set_ia32_ls_mode(new_node, mode);
2058 set_address(new_node, &addr);
2060 if (get_irn_pinned(node) == op_pin_state_floats) {
2061 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2062 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2063 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2064 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2067 SET_IA32_ORIG_NODE(new_node, node);
2069 be_dep_on_frame(new_node);
2073 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2074 ir_node *ptr, ir_node *other)
2081 /* we only use address mode if we're the only user of the load */
2082 if (get_irn_n_edges(node) > 1)
2085 load = get_Proj_pred(node);
2088 if (get_nodes_block(load) != block)
2091 /* store should have the same pointer as the load */
2092 if (get_Load_ptr(load) != ptr)
2095 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2096 if (other != NULL &&
2097 get_nodes_block(other) == block &&
2098 heights_reachable_in_block(ia32_heights, other, load)) {
2102 if (ia32_prevents_AM(block, load, mem))
2104 /* Store should be attached to the load via mem */
2105 assert(heights_reachable_in_block(ia32_heights, mem, load));
2110 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2111 ir_node *mem, ir_node *ptr, ir_mode *mode,
2112 construct_binop_dest_func *func,
2113 construct_binop_dest_func *func8bit,
2114 match_flags_t flags)
2116 ir_node *src_block = get_nodes_block(node);
2124 ia32_address_mode_t am;
2125 ia32_address_t *addr = &am.addr;
2126 memset(&am, 0, sizeof(am));
2128 assert(flags & match_immediate); /* there is no destam node without... */
2129 commutative = (flags & match_commutative) != 0;
2131 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2132 build_address(&am, op1, ia32_create_am_double_use);
2133 new_op = create_immediate_or_transform(op2, 0);
2134 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2135 build_address(&am, op2, ia32_create_am_double_use);
2136 new_op = create_immediate_or_transform(op1, 0);
2141 if (addr->base == NULL)
2142 addr->base = noreg_GP;
2143 if (addr->index == NULL)
2144 addr->index = noreg_GP;
2145 if (addr->mem == NULL)
2148 dbgi = get_irn_dbg_info(node);
2149 block = be_transform_node(src_block);
2150 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2152 if (get_mode_size_bits(mode) == 8) {
2153 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2155 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2157 set_address(new_node, addr);
2158 set_ia32_op_type(new_node, ia32_AddrModeD);
2159 set_ia32_ls_mode(new_node, mode);
2160 SET_IA32_ORIG_NODE(new_node, node);
2162 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2163 mem_proj = be_transform_node(am.mem_proj);
2164 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2169 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2170 ir_node *ptr, ir_mode *mode,
2171 construct_unop_dest_func *func)
2173 ir_node *src_block = get_nodes_block(node);
2179 ia32_address_mode_t am;
2180 ia32_address_t *addr = &am.addr;
2182 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2185 memset(&am, 0, sizeof(am));
2186 build_address(&am, op, ia32_create_am_double_use);
2188 dbgi = get_irn_dbg_info(node);
2189 block = be_transform_node(src_block);
2190 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2191 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2192 set_address(new_node, addr);
2193 set_ia32_op_type(new_node, ia32_AddrModeD);
2194 set_ia32_ls_mode(new_node, mode);
2195 SET_IA32_ORIG_NODE(new_node, node);
2197 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2198 mem_proj = be_transform_node(am.mem_proj);
2199 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2204 static int ia32_get_negated_pnc(int pnc)
2206 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2207 return get_negated_pnc(pnc, mode);
2210 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2212 ir_mode *mode = get_irn_mode(node);
2213 ir_node *mux_true = get_Mux_true(node);
2214 ir_node *mux_false = get_Mux_false(node);
2223 ia32_address_t addr;
2225 if (get_mode_size_bits(mode) != 8)
2228 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2230 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2236 cond = get_Mux_sel(node);
2237 flags = get_flags_node(cond, &pnc);
2238 /* we can't handle the float special cases with SetM */
2239 if (pnc & ia32_pn_Cmp_float)
2242 pnc = ia32_get_negated_pnc(pnc);
2244 build_address_ptr(&addr, ptr, mem);
2246 dbgi = get_irn_dbg_info(node);
2247 block = get_nodes_block(node);
2248 new_block = be_transform_node(block);
2249 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2250 addr.index, addr.mem, flags, pnc);
2251 set_address(new_node, &addr);
2252 set_ia32_op_type(new_node, ia32_AddrModeD);
2253 set_ia32_ls_mode(new_node, mode);
2254 SET_IA32_ORIG_NODE(new_node, node);
2259 static ir_node *try_create_dest_am(ir_node *node)
2261 ir_node *val = get_Store_value(node);
2262 ir_node *mem = get_Store_mem(node);
2263 ir_node *ptr = get_Store_ptr(node);
2264 ir_mode *mode = get_irn_mode(val);
2265 unsigned bits = get_mode_size_bits(mode);
2270 /* handle only GP modes for now... */
2271 if (!ia32_mode_needs_gp_reg(mode))
2275 /* store must be the only user of the val node */
2276 if (get_irn_n_edges(val) > 1)
2278 /* skip pointless convs */
2280 ir_node *conv_op = get_Conv_op(val);
2281 ir_mode *pred_mode = get_irn_mode(conv_op);
2282 if (!ia32_mode_needs_gp_reg(pred_mode))
2284 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2292 /* value must be in the same block */
2293 if (get_nodes_block(node) != get_nodes_block(val))
2296 switch (get_irn_opcode(val)) {
2298 op1 = get_Add_left(val);
2299 op2 = get_Add_right(val);
2300 if (ia32_cg_config.use_incdec) {
2301 if (is_Const_1(op2)) {
2302 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2304 } else if (is_Const_Minus_1(op2)) {
2305 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2309 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2310 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2311 match_commutative | match_immediate);
2314 op1 = get_Sub_left(val);
2315 op2 = get_Sub_right(val);
2316 if (is_Const(op2)) {
2317 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2319 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2320 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2324 op1 = get_And_left(val);
2325 op2 = get_And_right(val);
2326 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2327 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2328 match_commutative | match_immediate);
2331 op1 = get_Or_left(val);
2332 op2 = get_Or_right(val);
2333 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2334 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2335 match_commutative | match_immediate);
2338 op1 = get_Eor_left(val);
2339 op2 = get_Eor_right(val);
2340 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2341 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2342 match_commutative | match_immediate);
2345 op1 = get_Shl_left(val);
2346 op2 = get_Shl_right(val);
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2352 op1 = get_Shr_left(val);
2353 op2 = get_Shr_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2359 op1 = get_Shrs_left(val);
2360 op2 = get_Shrs_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2366 op1 = get_Rotl_left(val);
2367 op2 = get_Rotl_right(val);
2368 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2369 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2372 /* TODO: match ROR patterns... */
2374 new_node = try_create_SetMem(val, ptr, mem);
2378 op1 = get_Minus_op(val);
2379 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2382 /* should be lowered already */
2383 assert(mode != mode_b);
2384 op1 = get_Not_op(val);
2385 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2391 if (new_node != NULL) {
2392 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2393 get_irn_pinned(node) == op_pin_state_pinned) {
2394 set_irn_pinned(new_node, op_pin_state_pinned);
2401 static bool possible_int_mode_for_fp(ir_mode *mode)
2405 if (!mode_is_signed(mode))
2407 size = get_mode_size_bits(mode);
2408 if (size != 16 && size != 32)
2413 static int is_float_to_int_conv(const ir_node *node)
2415 ir_mode *mode = get_irn_mode(node);
2419 if (!possible_int_mode_for_fp(mode))
2424 conv_op = get_Conv_op(node);
2425 conv_mode = get_irn_mode(conv_op);
2427 if (!mode_is_float(conv_mode))
2434 * Transform a Store(floatConst) into a sequence of
2437 * @return the created ia32 Store node
2439 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2441 ir_mode *mode = get_irn_mode(cns);
2442 unsigned size = get_mode_size_bytes(mode);
2443 ir_tarval *tv = get_Const_tarval(cns);
2444 ir_node *block = get_nodes_block(node);
2445 ir_node *new_block = be_transform_node(block);
2446 ir_node *ptr = get_Store_ptr(node);
2447 ir_node *mem = get_Store_mem(node);
2448 dbg_info *dbgi = get_irn_dbg_info(node);
2452 ia32_address_t addr;
2454 assert(size % 4 == 0);
2457 build_address_ptr(&addr, ptr, mem);
2461 get_tarval_sub_bits(tv, ofs) |
2462 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2463 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2464 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2465 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2467 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2468 addr.index, addr.mem, imm);
2470 set_irn_pinned(new_node, get_irn_pinned(node));
2471 set_ia32_op_type(new_node, ia32_AddrModeD);
2472 set_ia32_ls_mode(new_node, mode_Iu);
2473 set_address(new_node, &addr);
2474 SET_IA32_ORIG_NODE(new_node, node);
2477 ins[i++] = new_node;
2482 } while (size != 0);
2485 return new_rd_Sync(dbgi, new_block, i, ins);
2492 * Generate a vfist or vfisttp instruction.
2494 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2495 ir_node *mem, ir_node *val, ir_node **fist)
2499 if (ia32_cg_config.use_fisttp) {
2500 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2501 if other users exists */
2502 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2503 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2504 be_new_Keep(block, 1, &value);
2506 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2509 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2512 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2518 * Transforms a general (no special case) Store.
2520 * @return the created ia32 Store node
2522 static ir_node *gen_general_Store(ir_node *node)
2524 ir_node *val = get_Store_value(node);
2525 ir_mode *mode = get_irn_mode(val);
2526 ir_node *block = get_nodes_block(node);
2527 ir_node *new_block = be_transform_node(block);
2528 ir_node *ptr = get_Store_ptr(node);
2529 ir_node *mem = get_Store_mem(node);
2530 dbg_info *dbgi = get_irn_dbg_info(node);
2531 ir_node *new_val, *new_node, *store;
2532 ia32_address_t addr;
2534 /* check for destination address mode */
2535 new_node = try_create_dest_am(node);
2536 if (new_node != NULL)
2539 /* construct store address */
2540 memset(&addr, 0, sizeof(addr));
2541 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2543 if (addr.base == NULL) {
2544 addr.base = noreg_GP;
2546 addr.base = be_transform_node(addr.base);
2549 if (addr.index == NULL) {
2550 addr.index = noreg_GP;
2552 addr.index = be_transform_node(addr.index);
2554 addr.mem = be_transform_node(mem);
2556 if (mode_is_float(mode)) {
2557 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2559 while (is_Conv(val) && mode == get_irn_mode(val)) {
2560 ir_node *op = get_Conv_op(val);
2561 if (!mode_is_float(get_irn_mode(op)))
2565 new_val = be_transform_node(val);
2566 if (ia32_cg_config.use_sse2) {
2567 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2568 addr.index, addr.mem, new_val);
2570 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2571 addr.index, addr.mem, new_val, mode);
2574 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2575 val = get_Conv_op(val);
2577 /* TODO: is this optimisation still necessary at all (middleend)? */
2578 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2579 while (is_Conv(val)) {
2580 ir_node *op = get_Conv_op(val);
2581 if (!mode_is_float(get_irn_mode(op)))
2583 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2587 new_val = be_transform_node(val);
2588 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2590 new_val = create_immediate_or_transform(val, 0);
2591 assert(mode != mode_b);
2593 if (get_mode_size_bits(mode) == 8) {
2594 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2595 addr.index, addr.mem, new_val);
2597 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2598 addr.index, addr.mem, new_val);
2603 set_irn_pinned(store, get_irn_pinned(node));
2604 set_ia32_op_type(store, ia32_AddrModeD);
2605 set_ia32_ls_mode(store, mode);
2607 set_address(store, &addr);
2608 SET_IA32_ORIG_NODE(store, node);
2614 * Transforms a Store.
2616 * @return the created ia32 Store node
2618 static ir_node *gen_Store(ir_node *node)
2620 ir_node *val = get_Store_value(node);
2621 ir_mode *mode = get_irn_mode(val);
2623 if (mode_is_float(mode) && is_Const(val)) {
2624 /* We can transform every floating const store
2625 into a sequence of integer stores.
2626 If the constant is already in a register,
2627 it would be better to use it, but we don't
2628 have this information here. */
2629 return gen_float_const_Store(node, val);
2631 return gen_general_Store(node);
2635 * Transforms a Switch.
2637 * @return the created ia32 SwitchJmp node
2639 static ir_node *create_Switch(ir_node *node)
2641 dbg_info *dbgi = get_irn_dbg_info(node);
2642 ir_node *block = be_transform_node(get_nodes_block(node));
2643 ir_node *sel = get_Cond_selector(node);
2644 ir_node *new_sel = be_transform_node(sel);
2645 long switch_min = LONG_MAX;
2646 long switch_max = LONG_MIN;
2647 long default_pn = get_Cond_default_proj(node);
2649 const ir_edge_t *edge;
2651 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2653 /* determine the smallest switch case value */
2654 foreach_out_edge(node, edge) {
2655 ir_node *proj = get_edge_src_irn(edge);
2656 long pn = get_Proj_proj(proj);
2657 if (pn == default_pn)
2660 if (pn < switch_min)
2662 if (pn > switch_max)
2666 if ((unsigned long) (switch_max - switch_min) > 128000) {
2667 panic("Size of switch %+F bigger than 128000", node);
2670 if (switch_min != 0) {
2671 /* if smallest switch case is not 0 we need an additional sub */
2672 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2673 add_ia32_am_offs_int(new_sel, -switch_min);
2674 set_ia32_op_type(new_sel, ia32_AddrModeS);
2676 SET_IA32_ORIG_NODE(new_sel, node);
2679 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2680 SET_IA32_ORIG_NODE(new_node, node);
2686 * Transform a Cond node.
2688 static ir_node *gen_Cond(ir_node *node)
2690 ir_node *block = get_nodes_block(node);
2691 ir_node *new_block = be_transform_node(block);
2692 dbg_info *dbgi = get_irn_dbg_info(node);
2693 ir_node *sel = get_Cond_selector(node);
2694 ir_mode *sel_mode = get_irn_mode(sel);
2695 ir_node *flags = NULL;
2699 if (sel_mode != mode_b) {
2700 return create_Switch(node);
2703 /* we get flags from a Cmp */
2704 flags = get_flags_node(sel, &pnc);
2706 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2707 SET_IA32_ORIG_NODE(new_node, node);
2713 * Transform a be_Copy.
2715 static ir_node *gen_be_Copy(ir_node *node)
2717 ir_node *new_node = be_duplicate_node(node);
2718 ir_mode *mode = get_irn_mode(new_node);
2720 if (ia32_mode_needs_gp_reg(mode)) {
2721 set_irn_mode(new_node, mode_Iu);
2727 static ir_node *create_Fucom(ir_node *node)
2729 dbg_info *dbgi = get_irn_dbg_info(node);
2730 ir_node *block = get_nodes_block(node);
2731 ir_node *new_block = be_transform_node(block);
2732 ir_node *left = get_Cmp_left(node);
2733 ir_node *new_left = be_transform_node(left);
2734 ir_node *right = get_Cmp_right(node);
2738 if (ia32_cg_config.use_fucomi) {
2739 new_right = be_transform_node(right);
2740 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2742 set_ia32_commutative(new_node);
2743 SET_IA32_ORIG_NODE(new_node, node);
2745 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2746 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2748 new_right = be_transform_node(right);
2749 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2752 set_ia32_commutative(new_node);
2754 SET_IA32_ORIG_NODE(new_node, node);
2756 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2757 SET_IA32_ORIG_NODE(new_node, node);
2763 static ir_node *create_Ucomi(ir_node *node)
2765 dbg_info *dbgi = get_irn_dbg_info(node);
2766 ir_node *src_block = get_nodes_block(node);
2767 ir_node *new_block = be_transform_node(src_block);
2768 ir_node *left = get_Cmp_left(node);
2769 ir_node *right = get_Cmp_right(node);
2771 ia32_address_mode_t am;
2772 ia32_address_t *addr = &am.addr;
2774 match_arguments(&am, src_block, left, right, NULL,
2775 match_commutative | match_am);
2777 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2778 addr->mem, am.new_op1, am.new_op2,
2780 set_am_attributes(new_node, &am);
2782 SET_IA32_ORIG_NODE(new_node, node);
2784 new_node = fix_mem_proj(new_node, &am);
2790 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2791 * to fold an and into a test node
2793 static bool can_fold_test_and(ir_node *node)
2795 const ir_edge_t *edge;
2797 /** we can only have eq and lg projs */
2798 foreach_out_edge(node, edge) {
2799 ir_node *proj = get_edge_src_irn(edge);
2800 pn_Cmp pnc = get_Proj_pn_cmp(proj);
2801 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2809 * returns true if it is assured, that the upper bits of a node are "clean"
2810 * which means for a 16 or 8 bit value, that the upper bits in the register
2811 * are 0 for unsigned and a copy of the last significant bit for signed
2814 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2816 assert(ia32_mode_needs_gp_reg(mode));
2817 if (get_mode_size_bits(mode) >= 32)
2820 if (is_Proj(transformed_node))
2821 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2823 switch (get_ia32_irn_opcode(transformed_node)) {
2824 case iro_ia32_Conv_I2I:
2825 case iro_ia32_Conv_I2I8Bit: {
2826 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2827 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2829 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2836 if (mode_is_signed(mode)) {
2837 return false; /* TODO handle signed modes */
2839 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2840 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2841 const ia32_immediate_attr_t *attr
2842 = get_ia32_immediate_attr_const(right);
2843 if (attr->symconst == 0 &&
2844 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2848 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2852 /* TODO too conservative if shift amount is constant */
2853 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2856 if (!mode_is_signed(mode)) {
2858 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2859 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2861 /* TODO if one is known to be zero extended, then || is sufficient */
2866 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2867 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2869 case iro_ia32_Const:
2870 case iro_ia32_Immediate: {
2871 const ia32_immediate_attr_t *attr =
2872 get_ia32_immediate_attr_const(transformed_node);
2873 if (mode_is_signed(mode)) {
2874 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2875 return shifted == 0 || shifted == -1;
2877 unsigned long shifted = (unsigned long)attr->offset;
2878 shifted >>= get_mode_size_bits(mode);
2879 return shifted == 0;
2889 * Generate code for a Cmp.
2891 static ir_node *gen_Cmp(ir_node *node)
2893 dbg_info *dbgi = get_irn_dbg_info(node);
2894 ir_node *block = get_nodes_block(node);
2895 ir_node *new_block = be_transform_node(block);
2896 ir_node *left = get_Cmp_left(node);
2897 ir_node *right = get_Cmp_right(node);
2898 ir_mode *cmp_mode = get_irn_mode(left);
2900 ia32_address_mode_t am;
2901 ia32_address_t *addr = &am.addr;
2904 if (mode_is_float(cmp_mode)) {
2905 if (ia32_cg_config.use_sse2) {
2906 return create_Ucomi(node);
2908 return create_Fucom(node);
2912 assert(ia32_mode_needs_gp_reg(cmp_mode));
2914 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2915 cmp_unsigned = !mode_is_signed(cmp_mode);
2916 if (is_Const_0(right) &&
2918 get_irn_n_edges(left) == 1 &&
2919 can_fold_test_and(node)) {
2920 /* Test(and_left, and_right) */
2921 ir_node *and_left = get_And_left(left);
2922 ir_node *and_right = get_And_right(left);
2924 /* matze: code here used mode instead of cmd_mode, I think it is always
2925 * the same as cmp_mode, but I leave this here to see if this is really
2928 assert(get_irn_mode(and_left) == cmp_mode);
2930 match_arguments(&am, block, and_left, and_right, NULL,
2932 match_am | match_8bit_am | match_16bit_am |
2933 match_am_and_immediates | match_immediate);
2935 /* use 32bit compare mode if possible since the opcode is smaller */
2936 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2937 upper_bits_clean(am.new_op2, cmp_mode)) {
2938 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2941 if (get_mode_size_bits(cmp_mode) == 8) {
2942 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2943 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2946 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2947 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2950 /* Cmp(left, right) */
2951 match_arguments(&am, block, left, right, NULL,
2952 match_commutative | match_am | match_8bit_am |
2953 match_16bit_am | match_am_and_immediates |
2955 /* use 32bit compare mode if possible since the opcode is smaller */
2956 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2957 upper_bits_clean(am.new_op2, cmp_mode)) {
2958 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2961 if (get_mode_size_bits(cmp_mode) == 8) {
2962 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2963 addr->index, addr->mem, am.new_op1,
2964 am.new_op2, am.ins_permuted,
2967 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2968 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2971 set_am_attributes(new_node, &am);
2972 set_ia32_ls_mode(new_node, cmp_mode);
2974 SET_IA32_ORIG_NODE(new_node, node);
2976 new_node = fix_mem_proj(new_node, &am);
2981 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2984 dbg_info *dbgi = get_irn_dbg_info(node);
2985 ir_node *block = get_nodes_block(node);
2986 ir_node *new_block = be_transform_node(block);
2987 ir_node *val_true = get_Mux_true(node);
2988 ir_node *val_false = get_Mux_false(node);
2990 ia32_address_mode_t am;
2991 ia32_address_t *addr;
2993 assert(ia32_cg_config.use_cmov);
2994 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2998 match_arguments(&am, block, val_false, val_true, flags,
2999 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3001 if (am.ins_permuted)
3002 pnc = ia32_get_negated_pnc(pnc);
3004 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3005 addr->mem, am.new_op1, am.new_op2, new_flags,
3007 set_am_attributes(new_node, &am);
3009 SET_IA32_ORIG_NODE(new_node, node);
3011 new_node = fix_mem_proj(new_node, &am);
3017 * Creates a ia32 Setcc instruction.
3019 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3020 ir_node *flags, int pnc,
3023 ir_mode *mode = get_irn_mode(orig_node);
3026 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3027 SET_IA32_ORIG_NODE(new_node, orig_node);
3029 /* we might need to conv the result up */
3030 if (get_mode_size_bits(mode) > 8) {
3031 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3032 nomem, new_node, mode_Bu);
3033 SET_IA32_ORIG_NODE(new_node, orig_node);
3040 * Create instruction for an unsigned Difference or Zero.
3042 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3044 ir_mode *mode = get_irn_mode(psi);
3054 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3055 match_mode_neutral | match_am | match_immediate | match_two_users);
3057 block = get_nodes_block(new_node);
3059 if (is_Proj(new_node)) {
3060 sub = get_Proj_pred(new_node);
3061 assert(is_ia32_Sub(sub));
3064 set_irn_mode(sub, mode_T);
3065 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3067 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3069 dbgi = get_irn_dbg_info(psi);
3070 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3071 notn = new_bd_ia32_Not(dbgi, block, sbb);
3073 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3074 set_ia32_commutative(new_node);
3079 * Create an const array of two float consts.
3081 * @param c0 the first constant
3082 * @param c1 the second constant
3083 * @param new_mode IN/OUT for the mode of the constants, if NULL
3084 * smallest possible mode will be used
3086 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3089 ir_mode *mode = *new_mode;
3091 ir_initializer_t *initializer;
3092 ir_tarval *tv0 = get_Const_tarval(c0);
3093 ir_tarval *tv1 = get_Const_tarval(c1);
3096 /* detect the best mode for the constants */
3097 mode = get_tarval_mode(tv0);
3099 if (mode != mode_F) {
3100 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3101 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3103 tv0 = tarval_convert_to(tv0, mode);
3104 tv1 = tarval_convert_to(tv1, mode);
3105 } else if (mode != mode_D) {
3106 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3107 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3109 tv0 = tarval_convert_to(tv0, mode);
3110 tv1 = tarval_convert_to(tv1, mode);
3117 tp = ia32_create_float_type(mode, 4);
3118 tp = ia32_create_float_array(tp);
3120 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3122 set_entity_ld_ident(ent, get_entity_ident(ent));
3123 set_entity_visibility(ent, ir_visibility_private);
3124 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3126 initializer = create_initializer_compound(2);
3128 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3129 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3131 set_entity_initializer(ent, initializer);
3138 * Possible transformations for creating a Setcc.
3140 enum setcc_transform_insn {
3153 typedef struct setcc_transform {
3157 enum setcc_transform_insn transform;
3161 } setcc_transform_t;
3164 * Setcc can only handle 0 and 1 result.
3165 * Find a transformation that creates 0 and 1 from
3168 static void find_const_transform(int pnc, ir_tarval *t, ir_tarval *f,
3169 setcc_transform_t *res)
3175 if (tarval_is_null(t)) {
3179 pnc = ia32_get_negated_pnc(pnc);
3180 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3181 // now, t is the bigger one
3185 pnc = ia32_get_negated_pnc(pnc);
3189 if (! tarval_is_null(f)) {
3190 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3193 res->steps[step].transform = SETCC_TR_ADD;
3195 if (t == tarval_bad)
3196 panic("constant subtract failed");
3197 if (! tarval_is_long(f))
3198 panic("tarval is not long");
3200 res->steps[step].val = get_tarval_long(f);
3202 f = tarval_sub(f, f, NULL);
3203 assert(tarval_is_null(f));
3206 if (tarval_is_one(t)) {
3207 res->steps[step].transform = SETCC_TR_SET;
3208 res->num_steps = ++step;
3212 if (tarval_is_minus_one(t)) {
3213 res->steps[step].transform = SETCC_TR_NEG;
3215 res->steps[step].transform = SETCC_TR_SET;
3216 res->num_steps = ++step;
3219 if (tarval_is_long(t)) {
3220 long v = get_tarval_long(t);
3222 res->steps[step].val = 0;
3225 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3227 res->steps[step].transform = SETCC_TR_LEAxx;
3228 res->steps[step].scale = 3; /* (a << 3) + a */
3231 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3233 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3234 res->steps[step].scale = 3; /* (a << 3) */
3237 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3239 res->steps[step].transform = SETCC_TR_LEAxx;
3240 res->steps[step].scale = 2; /* (a << 2) + a */
3243 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3245 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3246 res->steps[step].scale = 2; /* (a << 2) */
3249 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3251 res->steps[step].transform = SETCC_TR_LEAxx;
3252 res->steps[step].scale = 1; /* (a << 1) + a */
3255 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3257 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3258 res->steps[step].scale = 1; /* (a << 1) */
3261 res->num_steps = step;
3264 if (! tarval_is_single_bit(t)) {
3265 res->steps[step].transform = SETCC_TR_AND;
3266 res->steps[step].val = v;
3268 res->steps[step].transform = SETCC_TR_NEG;
3270 int v = get_tarval_lowest_bit(t);
3273 res->steps[step].transform = SETCC_TR_SHL;
3274 res->steps[step].scale = v;
3278 res->steps[step].transform = SETCC_TR_SET;
3279 res->num_steps = ++step;
3282 panic("tarval is not long");
3286 * Transforms a Mux node into some code sequence.
3288 * @return The transformed node.
3290 static ir_node *gen_Mux(ir_node *node)
3292 dbg_info *dbgi = get_irn_dbg_info(node);
3293 ir_node *block = get_nodes_block(node);
3294 ir_node *new_block = be_transform_node(block);
3295 ir_node *mux_true = get_Mux_true(node);
3296 ir_node *mux_false = get_Mux_false(node);
3297 ir_node *cond = get_Mux_sel(node);
3298 ir_mode *mode = get_irn_mode(node);
3304 assert(get_irn_mode(cond) == mode_b);
3306 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3308 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3311 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3312 if (mode_is_float(mode)) {
3313 ir_node *cmp = get_Proj_pred(cond);
3314 ir_node *cmp_left = get_Cmp_left(cmp);
3315 ir_node *cmp_right = get_Cmp_right(cmp);
3316 int pnc = get_Proj_proj(cond);
3318 if (ia32_cg_config.use_sse2) {
3319 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3320 if (cmp_left == mux_true && cmp_right == mux_false) {
3321 /* Mux(a <= b, a, b) => MIN */
3322 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3323 match_commutative | match_am | match_two_users);
3324 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3325 /* Mux(a <= b, b, a) => MAX */
3326 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3327 match_commutative | match_am | match_two_users);
3329 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3330 if (cmp_left == mux_true && cmp_right == mux_false) {
3331 /* Mux(a >= b, a, b) => MAX */
3332 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3333 match_commutative | match_am | match_two_users);
3334 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3335 /* Mux(a >= b, b, a) => MIN */
3336 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3337 match_commutative | match_am | match_two_users);
3342 if (is_Const(mux_true) && is_Const(mux_false)) {
3343 ia32_address_mode_t am;
3348 flags = get_flags_node(cond, &pnc);
3349 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3351 if (ia32_cg_config.use_sse2) {
3352 /* cannot load from different mode on SSE */
3355 /* x87 can load any mode */
3359 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3361 switch (get_mode_size_bytes(new_mode)) {
3371 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3372 set_ia32_am_scale(new_node, 2);
3377 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3378 set_ia32_am_scale(new_node, 1);
3381 /* arg, shift 16 NOT supported */
3383 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3386 panic("Unsupported constant size");
3389 am.ls_mode = new_mode;
3390 am.addr.base = get_symconst_base();
3391 am.addr.index = new_node;
3392 am.addr.mem = nomem;
3394 am.addr.scale = scale;
3395 am.addr.use_frame = 0;
3396 am.addr.frame_entity = NULL;
3397 am.addr.symconst_sign = 0;
3398 am.mem_proj = am.addr.mem;
3399 am.op_type = ia32_AddrModeS;
3402 am.pinned = op_pin_state_floats;
3404 am.ins_permuted = 0;
3406 if (ia32_cg_config.use_sse2)
3407 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3409 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3410 set_am_attributes(load, &am);
3412 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3414 panic("cannot transform floating point Mux");
3417 assert(ia32_mode_needs_gp_reg(mode));
3419 if (is_Proj(cond)) {
3420 ir_node *cmp = get_Proj_pred(cond);
3422 ir_node *cmp_left = get_Cmp_left(cmp);
3423 ir_node *cmp_right = get_Cmp_right(cmp);
3424 ir_node *val_true = mux_true;
3425 ir_node *val_false = mux_false;
3426 int pnc = get_Proj_proj(cond);
3428 if (is_Const(val_true) && is_Const_null(val_true)) {
3429 ir_node *tmp = val_false;
3430 val_false = val_true;
3432 pnc = ia32_get_negated_pnc(pnc);
3434 if (is_Const_0(val_false) && is_Sub(val_true)) {
3435 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3436 && get_Sub_left(val_true) == cmp_left
3437 && get_Sub_right(val_true) == cmp_right) {
3438 return create_doz(node, cmp_left, cmp_right);
3440 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3441 && get_Sub_left(val_true) == cmp_right
3442 && get_Sub_right(val_true) == cmp_left) {
3443 return create_doz(node, cmp_right, cmp_left);
3449 flags = get_flags_node(cond, &pnc);
3451 if (is_Const(mux_true) && is_Const(mux_false)) {
3452 /* both are const, good */
3453 ir_tarval *tv_true = get_Const_tarval(mux_true);
3454 ir_tarval *tv_false = get_Const_tarval(mux_false);
3455 setcc_transform_t res;
3458 find_const_transform(pnc, tv_true, tv_false, &res);
3460 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3463 switch (res.steps[step].transform) {
3465 imm = ia32_immediate_from_long(res.steps[step].val);
3466 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3468 case SETCC_TR_ADDxx:
3469 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3472 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3473 set_ia32_am_scale(new_node, res.steps[step].scale);
3474 set_ia32_am_offs_int(new_node, res.steps[step].val);
3476 case SETCC_TR_LEAxx:
3477 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3478 set_ia32_am_scale(new_node, res.steps[step].scale);
3479 set_ia32_am_offs_int(new_node, res.steps[step].val);
3482 imm = ia32_immediate_from_long(res.steps[step].scale);
3483 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3486 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3489 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3492 imm = ia32_immediate_from_long(res.steps[step].val);
3493 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3496 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3499 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3502 panic("unknown setcc transform");
3506 new_node = create_CMov(node, cond, flags, pnc);
3514 * Create a conversion from x87 state register to general purpose.
3516 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3518 ir_node *block = be_transform_node(get_nodes_block(node));
3519 ir_node *op = get_Conv_op(node);
3520 ir_node *new_op = be_transform_node(op);
3521 ir_graph *irg = current_ir_graph;
3522 dbg_info *dbgi = get_irn_dbg_info(node);
3523 ir_mode *mode = get_irn_mode(node);
3524 ir_node *fist, *load, *mem;
3526 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3527 set_irn_pinned(fist, op_pin_state_floats);
3528 set_ia32_use_frame(fist);
3529 set_ia32_op_type(fist, ia32_AddrModeD);
3531 assert(get_mode_size_bits(mode) <= 32);
3532 /* exception we can only store signed 32 bit integers, so for unsigned
3533 we store a 64bit (signed) integer and load the lower bits */
3534 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3535 set_ia32_ls_mode(fist, mode_Ls);
3537 set_ia32_ls_mode(fist, mode_Is);
3539 SET_IA32_ORIG_NODE(fist, node);
3542 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3544 set_irn_pinned(load, op_pin_state_floats);
3545 set_ia32_use_frame(load);
3546 set_ia32_op_type(load, ia32_AddrModeS);
3547 set_ia32_ls_mode(load, mode_Is);
3548 if (get_ia32_ls_mode(fist) == mode_Ls) {
3549 ia32_attr_t *attr = get_ia32_attr(load);
3550 attr->data.need_64bit_stackent = 1;
3552 ia32_attr_t *attr = get_ia32_attr(load);
3553 attr->data.need_32bit_stackent = 1;
3555 SET_IA32_ORIG_NODE(load, node);
3557 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3561 * Creates a x87 strict Conv by placing a Store and a Load
3563 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3565 ir_node *block = get_nodes_block(node);
3566 ir_graph *irg = get_Block_irg(block);
3567 dbg_info *dbgi = get_irn_dbg_info(node);
3568 ir_node *frame = get_irg_frame(irg);
3569 ir_node *store, *load;
3572 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3573 set_ia32_use_frame(store);
3574 set_ia32_op_type(store, ia32_AddrModeD);
3575 SET_IA32_ORIG_NODE(store, node);
3577 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3578 set_ia32_use_frame(load);
3579 set_ia32_op_type(load, ia32_AddrModeS);
3580 SET_IA32_ORIG_NODE(load, node);
3582 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3586 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3587 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3589 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3591 func = get_mode_size_bits(mode) == 8 ?
3592 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3593 return func(dbgi, block, base, index, mem, val, mode);
3597 * Create a conversion from general purpose to x87 register
3599 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3601 ir_node *src_block = get_nodes_block(node);
3602 ir_node *block = be_transform_node(src_block);
3603 ir_graph *irg = get_Block_irg(block);
3604 dbg_info *dbgi = get_irn_dbg_info(node);
3605 ir_node *op = get_Conv_op(node);
3606 ir_node *new_op = NULL;
3608 ir_mode *store_mode;
3613 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3614 if (possible_int_mode_for_fp(src_mode)) {
3615 ia32_address_mode_t am;
3617 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3618 if (am.op_type == ia32_AddrModeS) {
3619 ia32_address_t *addr = &am.addr;
3621 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3622 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3624 set_am_attributes(fild, &am);
3625 SET_IA32_ORIG_NODE(fild, node);
3627 fix_mem_proj(fild, &am);
3632 if (new_op == NULL) {
3633 new_op = be_transform_node(op);
3636 mode = get_irn_mode(op);
3638 /* first convert to 32 bit signed if necessary */
3639 if (get_mode_size_bits(src_mode) < 32) {
3640 if (!upper_bits_clean(new_op, src_mode)) {
3641 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3642 SET_IA32_ORIG_NODE(new_op, node);
3647 assert(get_mode_size_bits(mode) == 32);
3650 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3652 set_ia32_use_frame(store);
3653 set_ia32_op_type(store, ia32_AddrModeD);
3654 set_ia32_ls_mode(store, mode_Iu);
3656 /* exception for 32bit unsigned, do a 64bit spill+load */
3657 if (!mode_is_signed(mode)) {
3660 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3662 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3663 noreg_GP, nomem, zero_const);
3665 set_ia32_use_frame(zero_store);
3666 set_ia32_op_type(zero_store, ia32_AddrModeD);
3667 add_ia32_am_offs_int(zero_store, 4);
3668 set_ia32_ls_mode(zero_store, mode_Iu);
3673 store = new_rd_Sync(dbgi, block, 2, in);
3674 store_mode = mode_Ls;
3676 store_mode = mode_Is;
3680 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3682 set_ia32_use_frame(fild);
3683 set_ia32_op_type(fild, ia32_AddrModeS);
3684 set_ia32_ls_mode(fild, store_mode);
3686 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3692 * Create a conversion from one integer mode into another one
3694 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3695 dbg_info *dbgi, ir_node *block, ir_node *op,
3698 ir_node *new_block = be_transform_node(block);
3700 ir_mode *smaller_mode;
3701 ia32_address_mode_t am;
3702 ia32_address_t *addr = &am.addr;
3705 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3706 smaller_mode = src_mode;
3708 smaller_mode = tgt_mode;
3711 #ifdef DEBUG_libfirm
3713 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3718 match_arguments(&am, block, NULL, op, NULL,
3719 match_am | match_8bit_am | match_16bit_am);
3721 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3722 /* unnecessary conv. in theory it shouldn't have been AM */
3723 assert(is_ia32_NoReg_GP(addr->base));
3724 assert(is_ia32_NoReg_GP(addr->index));
3725 assert(is_NoMem(addr->mem));
3726 assert(am.addr.offset == 0);
3727 assert(am.addr.symconst_ent == NULL);
3731 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3732 addr->mem, am.new_op2, smaller_mode);
3733 set_am_attributes(new_node, &am);
3734 /* match_arguments assume that out-mode = in-mode, this isn't true here
3736 set_ia32_ls_mode(new_node, smaller_mode);
3737 SET_IA32_ORIG_NODE(new_node, node);
3738 new_node = fix_mem_proj(new_node, &am);
3743 * Transforms a Conv node.
3745 * @return The created ia32 Conv node
3747 static ir_node *gen_Conv(ir_node *node)
3749 ir_node *block = get_nodes_block(node);
3750 ir_node *new_block = be_transform_node(block);
3751 ir_node *op = get_Conv_op(node);
3752 ir_node *new_op = NULL;
3753 dbg_info *dbgi = get_irn_dbg_info(node);
3754 ir_mode *src_mode = get_irn_mode(op);
3755 ir_mode *tgt_mode = get_irn_mode(node);
3756 int src_bits = get_mode_size_bits(src_mode);
3757 int tgt_bits = get_mode_size_bits(tgt_mode);
3758 ir_node *res = NULL;
3760 assert(!mode_is_int(src_mode) || src_bits <= 32);
3761 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3763 /* modeB -> X should already be lowered by the lower_mode_b pass */
3764 if (src_mode == mode_b) {
3765 panic("ConvB not lowered %+F", node);
3768 if (src_mode == tgt_mode) {
3769 if (get_Conv_strict(node)) {
3770 if (ia32_cg_config.use_sse2) {
3771 /* when we are in SSE mode, we can kill all strict no-op conversion */
3772 return be_transform_node(op);
3775 /* this should be optimized already, but who knows... */
3776 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3777 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3778 return be_transform_node(op);
3782 if (mode_is_float(src_mode)) {
3783 new_op = be_transform_node(op);
3784 /* we convert from float ... */
3785 if (mode_is_float(tgt_mode)) {
3787 if (ia32_cg_config.use_sse2) {
3788 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3789 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3791 set_ia32_ls_mode(res, tgt_mode);
3793 if (get_Conv_strict(node)) {
3794 /* if fp_no_float_fold is not set then we assume that we
3795 * don't have any float operations in a non
3796 * mode_float_arithmetic mode and can skip strict upconvs */
3797 if (src_bits < tgt_bits) {
3798 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3801 res = gen_x87_strict_conv(tgt_mode, new_op);
3802 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3806 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3811 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3812 if (ia32_cg_config.use_sse2) {
3813 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3815 set_ia32_ls_mode(res, src_mode);
3817 return gen_x87_fp_to_gp(node);
3821 /* we convert from int ... */
3822 if (mode_is_float(tgt_mode)) {
3824 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3825 if (ia32_cg_config.use_sse2) {
3826 new_op = be_transform_node(op);
3827 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3829 set_ia32_ls_mode(res, tgt_mode);
3831 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3832 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3833 res = gen_x87_gp_to_fp(node, src_mode);
3835 /* we need a strict-Conv, if the int mode has more bits than the
3837 if (float_mantissa < int_mantissa) {
3838 res = gen_x87_strict_conv(tgt_mode, res);
3839 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3843 } else if (tgt_mode == mode_b) {
3844 /* mode_b lowering already took care that we only have 0/1 values */
3845 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3846 src_mode, tgt_mode));
3847 return be_transform_node(op);
3850 if (src_bits == tgt_bits) {
3851 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3852 src_mode, tgt_mode));
3853 return be_transform_node(op);
3856 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3864 static ir_node *create_immediate_or_transform(ir_node *node,
3865 char immediate_constraint_type)
3867 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3868 if (new_node == NULL) {
3869 new_node = be_transform_node(node);
3875 * Transforms a FrameAddr into an ia32 Add.
3877 static ir_node *gen_be_FrameAddr(ir_node *node)
3879 ir_node *block = be_transform_node(get_nodes_block(node));
3880 ir_node *op = be_get_FrameAddr_frame(node);
3881 ir_node *new_op = be_transform_node(op);
3882 dbg_info *dbgi = get_irn_dbg_info(node);
3885 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3886 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3887 set_ia32_use_frame(new_node);
3889 SET_IA32_ORIG_NODE(new_node, node);
3895 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3897 static ir_node *gen_be_Return(ir_node *node)
3899 ir_graph *irg = current_ir_graph;
3900 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3901 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3902 ir_entity *ent = get_irg_entity(irg);
3903 ir_type *tp = get_entity_type(ent);
3908 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3909 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3911 int pn_ret_val, pn_ret_mem, arity, i;
3913 assert(ret_val != NULL);
3914 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3915 return be_duplicate_node(node);
3918 res_type = get_method_res_type(tp, 0);
3920 if (! is_Primitive_type(res_type)) {
3921 return be_duplicate_node(node);
3924 mode = get_type_mode(res_type);
3925 if (! mode_is_float(mode)) {
3926 return be_duplicate_node(node);
3929 assert(get_method_n_ress(tp) == 1);
3931 pn_ret_val = get_Proj_proj(ret_val);
3932 pn_ret_mem = get_Proj_proj(ret_mem);
3934 /* get the Barrier */
3935 barrier = get_Proj_pred(ret_val);
3937 /* get result input of the Barrier */
3938 ret_val = get_irn_n(barrier, pn_ret_val);
3939 new_ret_val = be_transform_node(ret_val);
3941 /* get memory input of the Barrier */
3942 ret_mem = get_irn_n(barrier, pn_ret_mem);
3943 new_ret_mem = be_transform_node(ret_mem);
3945 frame = get_irg_frame(irg);
3947 dbgi = get_irn_dbg_info(barrier);
3948 block = be_transform_node(get_nodes_block(barrier));
3950 /* store xmm0 onto stack */
3951 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3952 new_ret_mem, new_ret_val);
3953 set_ia32_ls_mode(sse_store, mode);
3954 set_ia32_op_type(sse_store, ia32_AddrModeD);
3955 set_ia32_use_frame(sse_store);
3957 /* load into x87 register */
3958 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3959 set_ia32_op_type(fld, ia32_AddrModeS);
3960 set_ia32_use_frame(fld);
3962 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3963 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3965 /* create a new barrier */
3966 arity = get_irn_arity(barrier);
3967 in = ALLOCAN(ir_node*, arity);
3968 for (i = 0; i < arity; ++i) {
3971 if (i == pn_ret_val) {
3973 } else if (i == pn_ret_mem) {
3976 ir_node *in = get_irn_n(barrier, i);
3977 new_in = be_transform_node(in);
3982 new_barrier = new_ir_node(dbgi, irg, block,
3983 get_irn_op(barrier), get_irn_mode(barrier),
3985 copy_node_attr(irg, barrier, new_barrier);
3986 be_duplicate_deps(barrier, new_barrier);
3987 be_set_transformed_node(barrier, new_barrier);
3989 /* transform normally */
3990 return be_duplicate_node(node);
3994 * Transform a be_AddSP into an ia32_SubSP.
3996 static ir_node *gen_be_AddSP(ir_node *node)
3998 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3999 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4001 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4002 match_am | match_immediate);
4006 * Transform a be_SubSP into an ia32_AddSP
4008 static ir_node *gen_be_SubSP(ir_node *node)
4010 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4011 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4013 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4014 match_am | match_immediate);
4018 * Change some phi modes
4020 static ir_node *gen_Phi(ir_node *node)
4022 const arch_register_req_t *req;
4023 ir_node *block = be_transform_node(get_nodes_block(node));
4024 ir_graph *irg = current_ir_graph;
4025 dbg_info *dbgi = get_irn_dbg_info(node);
4026 ir_mode *mode = get_irn_mode(node);
4029 if (ia32_mode_needs_gp_reg(mode)) {
4030 /* we shouldn't have any 64bit stuff around anymore */
4031 assert(get_mode_size_bits(mode) <= 32);
4032 /* all integer operations are on 32bit registers now */
4034 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4035 } else if (mode_is_float(mode)) {
4036 if (ia32_cg_config.use_sse2) {
4038 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4041 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4044 req = arch_no_register_req;
4047 /* phi nodes allow loops, so we use the old arguments for now
4048 * and fix this later */
4049 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4050 get_irn_in(node) + 1);
4051 copy_node_attr(irg, node, phi);
4052 be_duplicate_deps(node, phi);
4054 arch_set_out_register_req(phi, 0, req);
4056 be_enqueue_preds(node);
4061 static ir_node *gen_Jmp(ir_node *node)
4063 ir_node *block = get_nodes_block(node);
4064 ir_node *new_block = be_transform_node(block);
4065 dbg_info *dbgi = get_irn_dbg_info(node);
4068 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4069 SET_IA32_ORIG_NODE(new_node, node);
4077 static ir_node *gen_IJmp(ir_node *node)
4079 ir_node *block = get_nodes_block(node);
4080 ir_node *new_block = be_transform_node(block);
4081 dbg_info *dbgi = get_irn_dbg_info(node);
4082 ir_node *op = get_IJmp_target(node);
4084 ia32_address_mode_t am;
4085 ia32_address_t *addr = &am.addr;
4087 assert(get_irn_mode(op) == mode_P);
4089 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4091 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4092 addr->mem, am.new_op2);
4093 set_am_attributes(new_node, &am);
4094 SET_IA32_ORIG_NODE(new_node, node);
4096 new_node = fix_mem_proj(new_node, &am);
4102 * Transform a Bound node.
4104 static ir_node *gen_Bound(ir_node *node)
4107 ir_node *lower = get_Bound_lower(node);
4108 dbg_info *dbgi = get_irn_dbg_info(node);
4110 if (is_Const_0(lower)) {
4111 /* typical case for Java */
4112 ir_node *sub, *res, *flags, *block;
4114 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4116 match_mode_neutral | match_am | match_immediate);
4118 block = get_nodes_block(res);
4119 if (! is_Proj(res)) {
4121 set_irn_mode(sub, mode_T);
4122 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4124 sub = get_Proj_pred(res);
4126 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4127 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4128 SET_IA32_ORIG_NODE(new_node, node);
4130 panic("generic Bound not supported in ia32 Backend");
4136 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4138 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4139 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4141 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4142 match_immediate | match_mode_neutral);
4145 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4147 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4148 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4149 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4153 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4155 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4156 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4157 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4161 static ir_node *gen_ia32_l_Add(ir_node *node)
4163 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4164 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4165 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4166 match_commutative | match_am | match_immediate |
4167 match_mode_neutral);
4169 if (is_Proj(lowered)) {
4170 lowered = get_Proj_pred(lowered);
4172 assert(is_ia32_Add(lowered));
4173 set_irn_mode(lowered, mode_T);
4179 static ir_node *gen_ia32_l_Adc(ir_node *node)
4181 return gen_binop_flags(node, new_bd_ia32_Adc,
4182 match_commutative | match_am | match_immediate |
4183 match_mode_neutral);
4187 * Transforms a l_MulS into a "real" MulS node.
4189 * @return the created ia32 Mul node
4191 static ir_node *gen_ia32_l_Mul(ir_node *node)
4193 ir_node *left = get_binop_left(node);
4194 ir_node *right = get_binop_right(node);
4196 return gen_binop(node, left, right, new_bd_ia32_Mul,
4197 match_commutative | match_am | match_mode_neutral);
4201 * Transforms a l_IMulS into a "real" IMul1OPS node.
4203 * @return the created ia32 IMul1OP node
4205 static ir_node *gen_ia32_l_IMul(ir_node *node)
4207 ir_node *left = get_binop_left(node);
4208 ir_node *right = get_binop_right(node);
4210 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4211 match_commutative | match_am | match_mode_neutral);
4214 static ir_node *gen_ia32_l_Sub(ir_node *node)
4216 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4217 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4218 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4219 match_am | match_immediate | match_mode_neutral);
4221 if (is_Proj(lowered)) {
4222 lowered = get_Proj_pred(lowered);
4224 assert(is_ia32_Sub(lowered));
4225 set_irn_mode(lowered, mode_T);
4231 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4233 return gen_binop_flags(node, new_bd_ia32_Sbb,
4234 match_am | match_immediate | match_mode_neutral);
4238 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4239 * op1 - target to be shifted
4240 * op2 - contains bits to be shifted into target
4242 * Only op3 can be an immediate.
4244 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4245 ir_node *low, ir_node *count)
4247 ir_node *block = get_nodes_block(node);
4248 ir_node *new_block = be_transform_node(block);
4249 dbg_info *dbgi = get_irn_dbg_info(node);
4250 ir_node *new_high = be_transform_node(high);
4251 ir_node *new_low = be_transform_node(low);
4255 /* the shift amount can be any mode that is bigger than 5 bits, since all
4256 * other bits are ignored anyway */
4257 while (is_Conv(count) &&
4258 get_irn_n_edges(count) == 1 &&
4259 mode_is_int(get_irn_mode(count))) {
4260 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4261 count = get_Conv_op(count);
4263 new_count = create_immediate_or_transform(count, 0);
4265 if (is_ia32_l_ShlD(node)) {
4266 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4269 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4272 SET_IA32_ORIG_NODE(new_node, node);
4277 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4279 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4280 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4281 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4282 return gen_lowered_64bit_shifts(node, high, low, count);
4285 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4287 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4288 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4289 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4290 return gen_lowered_64bit_shifts(node, high, low, count);
4293 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4295 ir_node *src_block = get_nodes_block(node);
4296 ir_node *block = be_transform_node(src_block);
4297 ir_graph *irg = current_ir_graph;
4298 dbg_info *dbgi = get_irn_dbg_info(node);
4299 ir_node *frame = get_irg_frame(irg);
4300 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4301 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4302 ir_node *new_val_low = be_transform_node(val_low);
4303 ir_node *new_val_high = be_transform_node(val_high);
4305 ir_node *sync, *fild, *res;
4306 ir_node *store_low, *store_high;
4308 if (ia32_cg_config.use_sse2) {
4309 panic("ia32_l_LLtoFloat not implemented for SSE2");
4313 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4315 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4317 SET_IA32_ORIG_NODE(store_low, node);
4318 SET_IA32_ORIG_NODE(store_high, node);
4320 set_ia32_use_frame(store_low);
4321 set_ia32_use_frame(store_high);
4322 set_ia32_op_type(store_low, ia32_AddrModeD);
4323 set_ia32_op_type(store_high, ia32_AddrModeD);
4324 set_ia32_ls_mode(store_low, mode_Iu);
4325 set_ia32_ls_mode(store_high, mode_Is);
4326 add_ia32_am_offs_int(store_high, 4);
4330 sync = new_rd_Sync(dbgi, block, 2, in);
4333 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4335 set_ia32_use_frame(fild);
4336 set_ia32_op_type(fild, ia32_AddrModeS);
4337 set_ia32_ls_mode(fild, mode_Ls);
4339 SET_IA32_ORIG_NODE(fild, node);
4341 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4343 if (! mode_is_signed(get_irn_mode(val_high))) {
4344 ia32_address_mode_t am;
4346 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4349 am.addr.base = get_symconst_base();
4350 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4351 am.addr.mem = nomem;
4354 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4355 am.addr.use_frame = 0;
4356 am.addr.frame_entity = NULL;
4357 am.addr.symconst_sign = 0;
4358 am.ls_mode = mode_F;
4359 am.mem_proj = nomem;
4360 am.op_type = ia32_AddrModeS;
4362 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4363 am.pinned = op_pin_state_floats;
4365 am.ins_permuted = 0;
4367 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4368 am.new_op1, am.new_op2, get_fpcw());
4369 set_am_attributes(fadd, &am);
4371 set_irn_mode(fadd, mode_T);
4372 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4377 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4379 ir_node *src_block = get_nodes_block(node);
4380 ir_node *block = be_transform_node(src_block);
4381 ir_graph *irg = get_Block_irg(block);
4382 dbg_info *dbgi = get_irn_dbg_info(node);
4383 ir_node *frame = get_irg_frame(irg);
4384 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4385 ir_node *new_val = be_transform_node(val);
4386 ir_node *fist, *mem;
4388 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4389 SET_IA32_ORIG_NODE(fist, node);
4390 set_ia32_use_frame(fist);
4391 set_ia32_op_type(fist, ia32_AddrModeD);
4392 set_ia32_ls_mode(fist, mode_Ls);
4397 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4399 ir_node *block = be_transform_node(get_nodes_block(node));
4400 ir_graph *irg = get_Block_irg(block);
4401 ir_node *pred = get_Proj_pred(node);
4402 ir_node *new_pred = be_transform_node(pred);
4403 ir_node *frame = get_irg_frame(irg);
4404 dbg_info *dbgi = get_irn_dbg_info(node);
4405 long pn = get_Proj_proj(node);
4410 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4411 SET_IA32_ORIG_NODE(load, node);
4412 set_ia32_use_frame(load);
4413 set_ia32_op_type(load, ia32_AddrModeS);
4414 set_ia32_ls_mode(load, mode_Iu);
4415 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4416 * 32 bit from it with this particular load */
4417 attr = get_ia32_attr(load);
4418 attr->data.need_64bit_stackent = 1;
4420 if (pn == pn_ia32_l_FloattoLL_res_high) {
4421 add_ia32_am_offs_int(load, 4);
4423 assert(pn == pn_ia32_l_FloattoLL_res_low);
4426 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4432 * Transform the Projs of an AddSP.
4434 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4436 ir_node *pred = get_Proj_pred(node);
4437 ir_node *new_pred = be_transform_node(pred);
4438 dbg_info *dbgi = get_irn_dbg_info(node);
4439 long proj = get_Proj_proj(node);
4441 if (proj == pn_be_AddSP_sp) {
4442 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4443 pn_ia32_SubSP_stack);
4444 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4446 } else if (proj == pn_be_AddSP_res) {
4447 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4448 pn_ia32_SubSP_addr);
4449 } else if (proj == pn_be_AddSP_M) {
4450 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4453 panic("No idea how to transform proj->AddSP");
4457 * Transform the Projs of a SubSP.
4459 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4461 ir_node *pred = get_Proj_pred(node);
4462 ir_node *new_pred = be_transform_node(pred);
4463 dbg_info *dbgi = get_irn_dbg_info(node);
4464 long proj = get_Proj_proj(node);
4466 if (proj == pn_be_SubSP_sp) {
4467 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4468 pn_ia32_AddSP_stack);
4469 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4471 } else if (proj == pn_be_SubSP_M) {
4472 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4475 panic("No idea how to transform proj->SubSP");
4479 * Transform and renumber the Projs from a Load.
4481 static ir_node *gen_Proj_Load(ir_node *node)
4484 ir_node *block = be_transform_node(get_nodes_block(node));
4485 ir_node *pred = get_Proj_pred(node);
4486 dbg_info *dbgi = get_irn_dbg_info(node);
4487 long proj = get_Proj_proj(node);
4489 /* loads might be part of source address mode matches, so we don't
4490 * transform the ProjMs yet (with the exception of loads whose result is
4493 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4496 /* this is needed, because sometimes we have loops that are only
4497 reachable through the ProjM */
4498 be_enqueue_preds(node);
4499 /* do it in 2 steps, to silence firm verifier */
4500 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4501 set_Proj_proj(res, pn_ia32_mem);
4505 /* renumber the proj */
4506 new_pred = be_transform_node(pred);
4507 if (is_ia32_Load(new_pred)) {
4510 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4512 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4513 case pn_Load_X_regular:
4514 return new_rd_Jmp(dbgi, block);
4515 case pn_Load_X_except:
4516 /* This Load might raise an exception. Mark it. */
4517 set_ia32_exc_label(new_pred, 1);
4518 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4522 } else if (is_ia32_Conv_I2I(new_pred) ||
4523 is_ia32_Conv_I2I8Bit(new_pred)) {
4524 set_irn_mode(new_pred, mode_T);
4525 if (proj == pn_Load_res) {
4526 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4527 } else if (proj == pn_Load_M) {
4528 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4530 } else if (is_ia32_xLoad(new_pred)) {
4533 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4535 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4536 case pn_Load_X_regular:
4537 return new_rd_Jmp(dbgi, block);
4538 case pn_Load_X_except:
4539 /* This Load might raise an exception. Mark it. */
4540 set_ia32_exc_label(new_pred, 1);
4541 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4545 } else if (is_ia32_vfld(new_pred)) {
4548 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4550 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4551 case pn_Load_X_regular:
4552 return new_rd_Jmp(dbgi, block);
4553 case pn_Load_X_except:
4554 /* This Load might raise an exception. Mark it. */
4555 set_ia32_exc_label(new_pred, 1);
4556 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4561 /* can happen for ProJMs when source address mode happened for the
4564 /* however it should not be the result proj, as that would mean the
4565 load had multiple users and should not have been used for
4567 if (proj != pn_Load_M) {
4568 panic("internal error: transformed node not a Load");
4570 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4573 panic("No idea how to transform proj");
4577 * Transform and renumber the Projs from a Div or Mod instruction.
4579 static ir_node *gen_Proj_Div_Mod(ir_node *node)
4581 ir_node *block = be_transform_node(get_nodes_block(node));
4582 ir_node *pred = get_Proj_pred(node);
4583 ir_node *new_pred = be_transform_node(pred);
4584 dbg_info *dbgi = get_irn_dbg_info(node);
4585 long proj = get_Proj_proj(node);
4587 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4589 switch (get_irn_opcode(pred)) {
4593 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4595 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4596 case pn_Div_X_regular:
4597 return new_rd_Jmp(dbgi, block);
4598 case pn_Div_X_except:
4599 set_ia32_exc_label(new_pred, 1);
4600 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4608 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4610 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4611 case pn_Mod_X_except:
4612 set_ia32_exc_label(new_pred, 1);
4613 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4622 panic("No idea how to transform proj->Div/Mod");
4626 * Transform and renumber the Projs from a CopyB.
4628 static ir_node *gen_Proj_CopyB(ir_node *node)
4630 ir_node *pred = get_Proj_pred(node);
4631 ir_node *new_pred = be_transform_node(pred);
4632 dbg_info *dbgi = get_irn_dbg_info(node);
4633 long proj = get_Proj_proj(node);
4637 if (is_ia32_CopyB_i(new_pred)) {
4638 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4639 } else if (is_ia32_CopyB(new_pred)) {
4640 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4647 panic("No idea how to transform proj->CopyB");
4651 * Transform and renumber the Projs from a Quot.
4653 static ir_node *gen_Proj_Quot(ir_node *node)
4655 ir_node *pred = get_Proj_pred(node);
4656 ir_node *new_pred = be_transform_node(pred);
4657 dbg_info *dbgi = get_irn_dbg_info(node);
4658 long proj = get_Proj_proj(node);
4662 if (is_ia32_xDiv(new_pred)) {
4663 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4664 } else if (is_ia32_vfdiv(new_pred)) {
4665 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4669 if (is_ia32_xDiv(new_pred)) {
4670 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4671 } else if (is_ia32_vfdiv(new_pred)) {
4672 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4675 case pn_Quot_X_regular:
4676 case pn_Quot_X_except:
4681 panic("No idea how to transform proj->Quot");
4684 static ir_node *gen_be_Call(ir_node *node)
4686 dbg_info *const dbgi = get_irn_dbg_info(node);
4687 ir_node *const src_block = get_nodes_block(node);
4688 ir_node *const block = be_transform_node(src_block);
4689 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4690 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4691 ir_node *const sp = be_transform_node(src_sp);
4692 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4693 ia32_address_mode_t am;
4694 ia32_address_t *const addr = &am.addr;
4699 ir_node * eax = noreg_GP;
4700 ir_node * ecx = noreg_GP;
4701 ir_node * edx = noreg_GP;
4702 unsigned const pop = be_Call_get_pop(node);
4703 ir_type *const call_tp = be_Call_get_type(node);
4704 int old_no_pic_adjust;
4706 /* Run the x87 simulator if the call returns a float value */
4707 if (get_method_n_ress(call_tp) > 0) {
4708 ir_type *const res_type = get_method_res_type(call_tp, 0);
4709 ir_mode *const res_mode = get_type_mode(res_type);
4711 if (res_mode != NULL && mode_is_float(res_mode)) {
4712 ir_graph *irg = current_ir_graph;
4713 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4714 irg_data->do_x87_sim = 1;
4718 /* We do not want be_Call direct calls */
4719 assert(be_Call_get_entity(node) == NULL);
4721 /* special case for PIC trampoline calls */
4722 old_no_pic_adjust = ia32_no_pic_adjust;
4723 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4725 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4726 match_am | match_immediate);
4728 ia32_no_pic_adjust = old_no_pic_adjust;
4730 i = get_irn_arity(node) - 1;
4731 fpcw = be_transform_node(get_irn_n(node, i--));
4732 for (; i >= be_pos_Call_first_arg; --i) {
4733 arch_register_req_t const *const req = arch_get_register_req(node, i);
4734 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4736 assert(req->type == arch_register_req_type_limited);
4737 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4739 switch (*req->limited) {
4740 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4741 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4742 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4743 default: panic("Invalid GP register for register parameter");
4747 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4748 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4749 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4750 set_am_attributes(call, &am);
4751 call = fix_mem_proj(call, &am);
4753 if (get_irn_pinned(node) == op_pin_state_pinned)
4754 set_irn_pinned(call, op_pin_state_pinned);
4756 SET_IA32_ORIG_NODE(call, node);
4758 if (ia32_cg_config.use_sse2) {
4759 /* remember this call for post-processing */
4760 ARR_APP1(ir_node *, call_list, call);
4761 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4768 * Transform Builtin trap
4770 static ir_node *gen_trap(ir_node *node)
4772 dbg_info *dbgi = get_irn_dbg_info(node);
4773 ir_node *block = be_transform_node(get_nodes_block(node));
4774 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4776 return new_bd_ia32_UD2(dbgi, block, mem);
4780 * Transform Builtin debugbreak
4782 static ir_node *gen_debugbreak(ir_node *node)
4784 dbg_info *dbgi = get_irn_dbg_info(node);
4785 ir_node *block = be_transform_node(get_nodes_block(node));
4786 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4788 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4792 * Transform Builtin return_address
4794 static ir_node *gen_return_address(ir_node *node)
4796 ir_node *param = get_Builtin_param(node, 0);
4797 ir_node *frame = get_Builtin_param(node, 1);
4798 dbg_info *dbgi = get_irn_dbg_info(node);
4799 ir_tarval *tv = get_Const_tarval(param);
4800 unsigned long value = get_tarval_long(tv);
4802 ir_node *block = be_transform_node(get_nodes_block(node));
4803 ir_node *ptr = be_transform_node(frame);
4807 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4808 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4809 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4812 /* load the return address from this frame */
4813 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4815 set_irn_pinned(load, get_irn_pinned(node));
4816 set_ia32_op_type(load, ia32_AddrModeS);
4817 set_ia32_ls_mode(load, mode_Iu);
4819 set_ia32_am_offs_int(load, 0);
4820 set_ia32_use_frame(load);
4821 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4823 if (get_irn_pinned(node) == op_pin_state_floats) {
4824 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4825 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4826 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4827 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4830 SET_IA32_ORIG_NODE(load, node);
4831 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4835 * Transform Builtin frame_address
4837 static ir_node *gen_frame_address(ir_node *node)
4839 ir_node *param = get_Builtin_param(node, 0);
4840 ir_node *frame = get_Builtin_param(node, 1);
4841 dbg_info *dbgi = get_irn_dbg_info(node);
4842 ir_tarval *tv = get_Const_tarval(param);
4843 unsigned long value = get_tarval_long(tv);
4845 ir_node *block = be_transform_node(get_nodes_block(node));
4846 ir_node *ptr = be_transform_node(frame);
4851 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4852 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4853 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4856 /* load the frame address from this frame */
4857 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4859 set_irn_pinned(load, get_irn_pinned(node));
4860 set_ia32_op_type(load, ia32_AddrModeS);
4861 set_ia32_ls_mode(load, mode_Iu);
4863 ent = ia32_get_frame_address_entity();
4865 set_ia32_am_offs_int(load, 0);
4866 set_ia32_use_frame(load);
4867 set_ia32_frame_ent(load, ent);
4869 /* will fail anyway, but gcc does this: */
4870 set_ia32_am_offs_int(load, 0);
4873 if (get_irn_pinned(node) == op_pin_state_floats) {
4874 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4875 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4876 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4877 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4880 SET_IA32_ORIG_NODE(load, node);
4881 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4885 * Transform Builtin frame_address
4887 static ir_node *gen_prefetch(ir_node *node)
4890 ir_node *ptr, *block, *mem, *base, *index;
4891 ir_node *param, *new_node;
4894 ia32_address_t addr;
4896 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4897 /* no prefetch at all, route memory */
4898 return be_transform_node(get_Builtin_mem(node));
4901 param = get_Builtin_param(node, 1);
4902 tv = get_Const_tarval(param);
4903 rw = get_tarval_long(tv);
4905 /* construct load address */
4906 memset(&addr, 0, sizeof(addr));
4907 ptr = get_Builtin_param(node, 0);
4908 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4915 base = be_transform_node(base);
4918 if (index == NULL) {
4921 index = be_transform_node(index);
4924 dbgi = get_irn_dbg_info(node);
4925 block = be_transform_node(get_nodes_block(node));
4926 mem = be_transform_node(get_Builtin_mem(node));
4928 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4929 /* we have 3DNow!, this was already checked above */
4930 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4931 } else if (ia32_cg_config.use_sse_prefetch) {
4932 /* note: rw == 1 is IGNORED in that case */
4933 param = get_Builtin_param(node, 2);
4934 tv = get_Const_tarval(param);
4935 locality = get_tarval_long(tv);
4937 /* SSE style prefetch */
4940 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4943 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4946 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4949 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4953 assert(ia32_cg_config.use_3dnow_prefetch);
4954 /* 3DNow! style prefetch */
4955 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4958 set_irn_pinned(new_node, get_irn_pinned(node));
4959 set_ia32_op_type(new_node, ia32_AddrModeS);
4960 set_ia32_ls_mode(new_node, mode_Bu);
4961 set_address(new_node, &addr);
4963 SET_IA32_ORIG_NODE(new_node, node);
4965 be_dep_on_frame(new_node);
4966 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4970 * Transform bsf like node
4972 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4974 ir_node *param = get_Builtin_param(node, 0);
4975 dbg_info *dbgi = get_irn_dbg_info(node);
4977 ir_node *block = get_nodes_block(node);
4978 ir_node *new_block = be_transform_node(block);
4980 ia32_address_mode_t am;
4981 ia32_address_t *addr = &am.addr;
4984 match_arguments(&am, block, NULL, param, NULL, match_am);
4986 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4987 set_am_attributes(cnt, &am);
4988 set_ia32_ls_mode(cnt, get_irn_mode(param));
4990 SET_IA32_ORIG_NODE(cnt, node);
4991 return fix_mem_proj(cnt, &am);
4995 * Transform builtin ffs.
4997 static ir_node *gen_ffs(ir_node *node)
4999 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5000 ir_node *real = skip_Proj(bsf);
5001 dbg_info *dbgi = get_irn_dbg_info(real);
5002 ir_node *block = get_nodes_block(real);
5003 ir_node *flag, *set, *conv, *neg, *orn;
5006 if (get_irn_mode(real) != mode_T) {
5007 set_irn_mode(real, mode_T);
5008 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5011 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5014 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5015 SET_IA32_ORIG_NODE(set, node);
5018 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5019 SET_IA32_ORIG_NODE(conv, node);
5022 neg = new_bd_ia32_Neg(dbgi, block, conv);
5025 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5026 set_ia32_commutative(orn);
5029 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, orn, ia32_create_Immediate(NULL, 0, 1));
5033 * Transform builtin clz.
5035 static ir_node *gen_clz(ir_node *node)
5037 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5038 ir_node *real = skip_Proj(bsr);
5039 dbg_info *dbgi = get_irn_dbg_info(real);
5040 ir_node *block = get_nodes_block(real);
5041 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5043 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5047 * Transform builtin ctz.
5049 static ir_node *gen_ctz(ir_node *node)
5051 return gen_unop_AM(node, new_bd_ia32_Bsf);
5055 * Transform builtin parity.
5057 static ir_node *gen_parity(ir_node *node)
5059 ir_node *param = get_Builtin_param(node, 0);
5060 dbg_info *dbgi = get_irn_dbg_info(node);
5062 ir_node *block = get_nodes_block(node);
5064 ir_node *new_block = be_transform_node(block);
5065 ir_node *imm, *cmp, *new_node;
5067 ia32_address_mode_t am;
5068 ia32_address_t *addr = &am.addr;
5072 match_arguments(&am, block, NULL, param, NULL, match_am);
5073 imm = ia32_create_Immediate(NULL, 0, 0);
5074 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5075 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5076 set_am_attributes(cmp, &am);
5077 set_ia32_ls_mode(cmp, mode_Iu);
5079 SET_IA32_ORIG_NODE(cmp, node);
5081 cmp = fix_mem_proj(cmp, &am);
5084 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5085 SET_IA32_ORIG_NODE(new_node, node);
5088 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5089 nomem, new_node, mode_Bu);
5090 SET_IA32_ORIG_NODE(new_node, node);
5095 * Transform builtin popcount
5097 static ir_node *gen_popcount(ir_node *node)
5099 ir_node *param = get_Builtin_param(node, 0);
5100 dbg_info *dbgi = get_irn_dbg_info(node);
5102 ir_node *block = get_nodes_block(node);
5103 ir_node *new_block = be_transform_node(block);
5106 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5108 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5109 if (ia32_cg_config.use_popcnt) {
5110 ia32_address_mode_t am;
5111 ia32_address_t *addr = &am.addr;
5114 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5116 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5117 set_am_attributes(cnt, &am);
5118 set_ia32_ls_mode(cnt, get_irn_mode(param));
5120 SET_IA32_ORIG_NODE(cnt, node);
5121 return fix_mem_proj(cnt, &am);
5124 new_param = be_transform_node(param);
5126 /* do the standard popcount algo */
5128 /* m1 = x & 0x55555555 */
5129 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5130 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5133 simm = ia32_create_Immediate(NULL, 0, 1);
5134 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5136 /* m2 = s1 & 0x55555555 */
5137 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5140 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5142 /* m4 = m3 & 0x33333333 */
5143 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5144 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5147 simm = ia32_create_Immediate(NULL, 0, 2);
5148 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5150 /* m5 = s2 & 0x33333333 */
5151 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5154 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5156 /* m7 = m6 & 0x0F0F0F0F */
5157 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5158 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5161 simm = ia32_create_Immediate(NULL, 0, 4);
5162 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5164 /* m8 = s3 & 0x0F0F0F0F */
5165 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5168 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5170 /* m10 = m9 & 0x00FF00FF */
5171 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5172 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5175 simm = ia32_create_Immediate(NULL, 0, 8);
5176 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5178 /* m11 = s4 & 0x00FF00FF */
5179 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5181 /* m12 = m10 + m11 */
5182 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5184 /* m13 = m12 & 0x0000FFFF */
5185 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5186 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5188 /* s5 = m12 >> 16 */
5189 simm = ia32_create_Immediate(NULL, 0, 16);
5190 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5192 /* res = m13 + s5 */
5193 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5197 * Transform builtin byte swap.
5199 static ir_node *gen_bswap(ir_node *node)
5201 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5202 dbg_info *dbgi = get_irn_dbg_info(node);
5204 ir_node *block = get_nodes_block(node);
5205 ir_node *new_block = be_transform_node(block);
5206 ir_mode *mode = get_irn_mode(param);
5207 unsigned size = get_mode_size_bits(mode);
5208 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5212 if (ia32_cg_config.use_i486) {
5213 /* swap available */
5214 return new_bd_ia32_Bswap(dbgi, new_block, param);
5216 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5217 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5219 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5220 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5222 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5224 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5225 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5227 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5228 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5231 /* swap16 always available */
5232 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5235 panic("Invalid bswap size (%d)", size);
5240 * Transform builtin outport.
5242 static ir_node *gen_outport(ir_node *node)
5244 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5245 ir_node *oldv = get_Builtin_param(node, 1);
5246 ir_mode *mode = get_irn_mode(oldv);
5247 ir_node *value = be_transform_node(oldv);
5248 ir_node *block = be_transform_node(get_nodes_block(node));
5249 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5250 dbg_info *dbgi = get_irn_dbg_info(node);
5252 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5253 set_ia32_ls_mode(res, mode);
5258 * Transform builtin inport.
5260 static ir_node *gen_inport(ir_node *node)
5262 ir_type *tp = get_Builtin_type(node);
5263 ir_type *rstp = get_method_res_type(tp, 0);
5264 ir_mode *mode = get_type_mode(rstp);
5265 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5266 ir_node *block = be_transform_node(get_nodes_block(node));
5267 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5268 dbg_info *dbgi = get_irn_dbg_info(node);
5270 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5271 set_ia32_ls_mode(res, mode);
5273 /* check for missing Result Proj */
5278 * Transform a builtin inner trampoline
5280 static ir_node *gen_inner_trampoline(ir_node *node)
5282 ir_node *ptr = get_Builtin_param(node, 0);
5283 ir_node *callee = get_Builtin_param(node, 1);
5284 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5285 ir_node *mem = get_Builtin_mem(node);
5286 ir_node *block = get_nodes_block(node);
5287 ir_node *new_block = be_transform_node(block);
5291 ir_node *trampoline;
5293 dbg_info *dbgi = get_irn_dbg_info(node);
5294 ia32_address_t addr;
5296 /* construct store address */
5297 memset(&addr, 0, sizeof(addr));
5298 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5300 if (addr.base == NULL) {
5301 addr.base = noreg_GP;
5303 addr.base = be_transform_node(addr.base);
5306 if (addr.index == NULL) {
5307 addr.index = noreg_GP;
5309 addr.index = be_transform_node(addr.index);
5311 addr.mem = be_transform_node(mem);
5313 /* mov ecx, <env> */
5314 val = ia32_create_Immediate(NULL, 0, 0xB9);
5315 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5316 addr.index, addr.mem, val);
5317 set_irn_pinned(store, get_irn_pinned(node));
5318 set_ia32_op_type(store, ia32_AddrModeD);
5319 set_ia32_ls_mode(store, mode_Bu);
5320 set_address(store, &addr);
5324 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5325 addr.index, addr.mem, env);
5326 set_irn_pinned(store, get_irn_pinned(node));
5327 set_ia32_op_type(store, ia32_AddrModeD);
5328 set_ia32_ls_mode(store, mode_Iu);
5329 set_address(store, &addr);
5333 /* jmp rel <callee> */
5334 val = ia32_create_Immediate(NULL, 0, 0xE9);
5335 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5336 addr.index, addr.mem, val);
5337 set_irn_pinned(store, get_irn_pinned(node));
5338 set_ia32_op_type(store, ia32_AddrModeD);
5339 set_ia32_ls_mode(store, mode_Bu);
5340 set_address(store, &addr);
5344 trampoline = be_transform_node(ptr);
5346 /* the callee is typically an immediate */
5347 if (is_SymConst(callee)) {
5348 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5350 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5352 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5354 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5355 addr.index, addr.mem, rel);
5356 set_irn_pinned(store, get_irn_pinned(node));
5357 set_ia32_op_type(store, ia32_AddrModeD);
5358 set_ia32_ls_mode(store, mode_Iu);
5359 set_address(store, &addr);
5364 return new_r_Tuple(new_block, 2, in);
5368 * Transform Builtin node.
5370 static ir_node *gen_Builtin(ir_node *node)
5372 ir_builtin_kind kind = get_Builtin_kind(node);
5376 return gen_trap(node);
5377 case ir_bk_debugbreak:
5378 return gen_debugbreak(node);
5379 case ir_bk_return_address:
5380 return gen_return_address(node);
5381 case ir_bk_frame_address:
5382 return gen_frame_address(node);
5383 case ir_bk_prefetch:
5384 return gen_prefetch(node);
5386 return gen_ffs(node);
5388 return gen_clz(node);
5390 return gen_ctz(node);
5392 return gen_parity(node);
5393 case ir_bk_popcount:
5394 return gen_popcount(node);
5396 return gen_bswap(node);
5398 return gen_outport(node);
5400 return gen_inport(node);
5401 case ir_bk_inner_trampoline:
5402 return gen_inner_trampoline(node);
5404 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5408 * Transform Proj(Builtin) node.
5410 static ir_node *gen_Proj_Builtin(ir_node *proj)
5412 ir_node *node = get_Proj_pred(proj);
5413 ir_node *new_node = be_transform_node(node);
5414 ir_builtin_kind kind = get_Builtin_kind(node);
5417 case ir_bk_return_address:
5418 case ir_bk_frame_address:
5423 case ir_bk_popcount:
5425 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5428 case ir_bk_debugbreak:
5429 case ir_bk_prefetch:
5431 assert(get_Proj_proj(proj) == pn_Builtin_M);
5434 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5435 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5437 assert(get_Proj_proj(proj) == pn_Builtin_M);
5438 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5440 case ir_bk_inner_trampoline:
5441 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5442 return get_Tuple_pred(new_node, 1);
5444 assert(get_Proj_proj(proj) == pn_Builtin_M);
5445 return get_Tuple_pred(new_node, 0);
5448 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5451 static ir_node *gen_be_IncSP(ir_node *node)
5453 ir_node *res = be_duplicate_node(node);
5454 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5460 * Transform the Projs from a be_Call.
5462 static ir_node *gen_Proj_be_Call(ir_node *node)
5464 ir_node *call = get_Proj_pred(node);
5465 ir_node *new_call = be_transform_node(call);
5466 dbg_info *dbgi = get_irn_dbg_info(node);
5467 long proj = get_Proj_proj(node);
5468 ir_mode *mode = get_irn_mode(node);
5471 if (proj == pn_be_Call_M_regular) {
5472 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5474 /* transform call modes */
5475 if (mode_is_data(mode)) {
5476 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5480 /* Map from be_Call to ia32_Call proj number */
5481 if (proj == pn_be_Call_sp) {
5482 proj = pn_ia32_Call_stack;
5483 } else if (proj == pn_be_Call_M_regular) {
5484 proj = pn_ia32_Call_M;
5486 arch_register_req_t const *const req = arch_get_register_req_out(node);
5487 int const n_outs = arch_irn_get_n_outs(new_call);
5490 assert(proj >= pn_be_Call_first_res);
5491 assert(req->type & arch_register_req_type_limited);
5493 for (i = 0; i < n_outs; ++i) {
5494 arch_register_req_t const *const new_req
5495 = arch_get_out_register_req(new_call, i);
5497 if (!(new_req->type & arch_register_req_type_limited) ||
5498 new_req->cls != req->cls ||
5499 *new_req->limited != *req->limited)
5508 res = new_rd_Proj(dbgi, new_call, mode, proj);
5510 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5512 case pn_ia32_Call_stack:
5513 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5516 case pn_ia32_Call_fpcw:
5517 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5525 * Transform the Projs from a Cmp.
5527 static ir_node *gen_Proj_Cmp(ir_node *node)
5529 /* this probably means not all mode_b nodes were lowered... */
5530 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5535 * Transform the Projs from a Bound.
5537 static ir_node *gen_Proj_Bound(ir_node *node)
5540 ir_node *pred = get_Proj_pred(node);
5542 switch (get_Proj_proj(node)) {
5544 return be_transform_node(get_Bound_mem(pred));
5545 case pn_Bound_X_regular:
5546 new_node = be_transform_node(pred);
5547 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5548 case pn_Bound_X_except:
5549 new_node = be_transform_node(pred);
5550 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5552 return be_transform_node(get_Bound_index(pred));
5554 panic("unsupported Proj from Bound");
5558 static ir_node *gen_Proj_ASM(ir_node *node)
5560 ir_mode *mode = get_irn_mode(node);
5561 ir_node *pred = get_Proj_pred(node);
5562 ir_node *new_pred = be_transform_node(pred);
5563 long pos = get_Proj_proj(node);
5565 if (mode == mode_M) {
5566 pos = arch_irn_get_n_outs(new_pred)-1;
5567 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5569 } else if (mode_is_float(mode)) {
5572 panic("unexpected proj mode at ASM");
5575 return new_r_Proj(new_pred, mode, pos);
5579 * Transform and potentially renumber Proj nodes.
5581 static ir_node *gen_Proj(ir_node *node)
5583 ir_node *pred = get_Proj_pred(node);
5586 switch (get_irn_opcode(pred)) {
5588 proj = get_Proj_proj(node);
5589 if (proj == pn_Store_M) {
5590 return be_transform_node(pred);
5592 panic("No idea how to transform proj->Store");
5595 return gen_Proj_Load(node);
5597 return gen_Proj_ASM(node);
5599 return gen_Proj_Builtin(node);
5602 return gen_Proj_Div_Mod(node);
5604 return gen_Proj_CopyB(node);
5606 return gen_Proj_Quot(node);
5608 return gen_Proj_be_SubSP(node);
5610 return gen_Proj_be_AddSP(node);
5612 return gen_Proj_be_Call(node);
5614 return gen_Proj_Cmp(node);
5616 return gen_Proj_Bound(node);
5618 proj = get_Proj_proj(node);
5620 case pn_Start_X_initial_exec: {
5621 ir_node *block = get_nodes_block(pred);
5622 ir_node *new_block = be_transform_node(block);
5623 dbg_info *dbgi = get_irn_dbg_info(node);
5624 /* we exchange the ProjX with a jump */
5625 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5630 case pn_Start_P_tls:
5631 return ia32_gen_Proj_tls(node);
5636 if (is_ia32_l_FloattoLL(pred)) {
5637 return gen_Proj_l_FloattoLL(node);
5639 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5643 ir_mode *mode = get_irn_mode(node);
5644 if (ia32_mode_needs_gp_reg(mode)) {
5645 ir_node *new_pred = be_transform_node(pred);
5646 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5647 get_Proj_proj(node));
5648 new_proj->node_nr = node->node_nr;
5653 return be_duplicate_node(node);
5657 * Enters all transform functions into the generic pointer
5659 static void register_transformers(void)
5661 /* first clear the generic function pointer for all ops */
5662 be_start_transform_setup();
5664 be_set_transform_function(op_Add, gen_Add);
5665 be_set_transform_function(op_And, gen_And);
5666 be_set_transform_function(op_ASM, ia32_gen_ASM);
5667 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5668 be_set_transform_function(op_be_Call, gen_be_Call);
5669 be_set_transform_function(op_be_Copy, gen_be_Copy);
5670 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5671 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5672 be_set_transform_function(op_be_Return, gen_be_Return);
5673 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5674 be_set_transform_function(op_Bound, gen_Bound);
5675 be_set_transform_function(op_Builtin, gen_Builtin);
5676 be_set_transform_function(op_Cmp, gen_Cmp);
5677 be_set_transform_function(op_Cond, gen_Cond);
5678 be_set_transform_function(op_Const, gen_Const);
5679 be_set_transform_function(op_Conv, gen_Conv);
5680 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5681 be_set_transform_function(op_Div, gen_Div);
5682 be_set_transform_function(op_Eor, gen_Eor);
5683 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5684 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5685 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5686 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5687 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5688 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5689 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5690 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5691 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5692 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5693 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5694 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5695 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5696 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5697 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5698 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5699 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5700 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5701 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5702 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5703 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5704 be_set_transform_function(op_IJmp, gen_IJmp);
5705 be_set_transform_function(op_Jmp, gen_Jmp);
5706 be_set_transform_function(op_Load, gen_Load);
5707 be_set_transform_function(op_Minus, gen_Minus);
5708 be_set_transform_function(op_Mod, gen_Mod);
5709 be_set_transform_function(op_Mul, gen_Mul);
5710 be_set_transform_function(op_Mulh, gen_Mulh);
5711 be_set_transform_function(op_Mux, gen_Mux);
5712 be_set_transform_function(op_Not, gen_Not);
5713 be_set_transform_function(op_Or, gen_Or);
5714 be_set_transform_function(op_Phi, gen_Phi);
5715 be_set_transform_function(op_Proj, gen_Proj);
5716 be_set_transform_function(op_Quot, gen_Quot);
5717 be_set_transform_function(op_Rotl, gen_Rotl);
5718 be_set_transform_function(op_Shl, gen_Shl);
5719 be_set_transform_function(op_Shr, gen_Shr);
5720 be_set_transform_function(op_Shrs, gen_Shrs);
5721 be_set_transform_function(op_Store, gen_Store);
5722 be_set_transform_function(op_Sub, gen_Sub);
5723 be_set_transform_function(op_SymConst, gen_SymConst);
5724 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5728 * Pre-transform all unknown and noreg nodes.
5730 static void ia32_pretransform_node(void)
5732 ir_graph *irg = current_ir_graph;
5733 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5735 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5736 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5737 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5739 nomem = get_irg_no_mem(irg);
5740 noreg_GP = ia32_new_NoReg_gp(irg);
5746 * Post-process all calls if we are in SSE mode.
5747 * The ABI requires that the results are in st0, copy them
5748 * to a xmm register.
5750 static void postprocess_fp_call_results(void)
5754 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5755 ir_node *call = call_list[i];
5756 ir_type *mtp = call_types[i];
5759 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5760 ir_type *res_tp = get_method_res_type(mtp, j);
5761 ir_node *res, *new_res;
5762 const ir_edge_t *edge, *next;
5765 if (! is_atomic_type(res_tp)) {
5766 /* no floating point return */
5769 mode = get_type_mode(res_tp);
5770 if (! mode_is_float(mode)) {
5771 /* no floating point return */
5775 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5778 /* now patch the users */
5779 foreach_out_edge_safe(res, edge, next) {
5780 ir_node *succ = get_edge_src_irn(edge);
5783 if (be_is_Keep(succ))
5786 if (is_ia32_xStore(succ)) {
5787 /* an xStore can be patched into an vfst */
5788 dbg_info *db = get_irn_dbg_info(succ);
5789 ir_node *block = get_nodes_block(succ);
5790 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5791 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5792 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5793 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5794 ir_mode *mode = get_ia32_ls_mode(succ);
5796 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5797 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5798 if (is_ia32_use_frame(succ))
5799 set_ia32_use_frame(st);
5800 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5801 set_irn_pinned(st, get_irn_pinned(succ));
5802 set_ia32_op_type(st, ia32_AddrModeD);
5806 if (new_res == NULL) {
5807 dbg_info *db = get_irn_dbg_info(call);
5808 ir_node *block = get_nodes_block(call);
5809 ir_node *frame = get_irg_frame(current_ir_graph);
5810 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5811 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5812 ir_node *vfst, *xld, *new_mem;
5814 /* store st(0) on stack */
5815 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5816 set_ia32_op_type(vfst, ia32_AddrModeD);
5817 set_ia32_use_frame(vfst);
5819 /* load into SSE register */
5820 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5821 set_ia32_op_type(xld, ia32_AddrModeS);
5822 set_ia32_use_frame(xld);
5824 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5825 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5827 if (old_mem != NULL) {
5828 edges_reroute(old_mem, new_mem, current_ir_graph);
5832 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5839 /* do the transformation */
5840 void ia32_transform_graph(ir_graph *irg)
5844 register_transformers();
5845 initial_fpcw = NULL;
5846 ia32_no_pic_adjust = 0;
5848 be_timer_push(T_HEIGHTS);
5849 ia32_heights = heights_new(irg);
5850 be_timer_pop(T_HEIGHTS);
5851 ia32_calculate_non_address_mode_nodes(irg);
5853 /* the transform phase is not safe for CSE (yet) because several nodes get
5854 * attributes set after their creation */
5855 cse_last = get_opt_cse();
5858 call_list = NEW_ARR_F(ir_node *, 0);
5859 call_types = NEW_ARR_F(ir_type *, 0);
5860 be_transform_graph(irg, ia32_pretransform_node);
5862 if (ia32_cg_config.use_sse2)
5863 postprocess_fp_call_results();
5864 DEL_ARR_F(call_types);
5865 DEL_ARR_F(call_list);
5867 set_opt_cse(cse_last);
5869 ia32_free_non_address_mode_nodes();
5870 heights_free(ia32_heights);
5871 ia32_heights = NULL;
5874 void ia32_init_transform(void)
5876 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");