2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_util.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *initial_fpcw = NULL;
94 int ia32_no_pic_adjust;
96 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
100 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
110 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem);
113 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
114 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
117 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
119 static ir_node *create_immediate_or_transform(ir_node *node,
120 char immediate_constraint_type);
122 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
123 dbg_info *dbgi, ir_node *block,
124 ir_node *op, ir_node *orig_node);
126 /* its enough to have those once */
127 static ir_node *nomem, *noreg_GP;
129 /** a list to postprocess all calls */
130 static ir_node **call_list;
131 static ir_type **call_types;
133 /** Return non-zero is a node represents the 0 constant. */
134 static bool is_Const_0(ir_node *node)
136 return is_Const(node) && is_Const_null(node);
139 /** Return non-zero is a node represents the 1 constant. */
140 static bool is_Const_1(ir_node *node)
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node)
148 return is_Const(node) && is_Const_all_one(node);
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_x87_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 if (tarval_is_null(tv) || tarval_is_one(tv))
160 /* TODO: match all the other float constants */
165 * returns true if constant can be created with a simple float command
167 static bool is_simple_sse_Const(ir_node *node)
169 ir_tarval *tv = get_Const_tarval(node);
170 ir_mode *mode = get_tarval_mode(tv);
175 if (tarval_is_null(tv)
176 #ifdef CONSTRUCT_SSE_CONST
181 #ifdef CONSTRUCT_SSE_CONST
182 if (mode == mode_D) {
183 unsigned val = get_tarval_sub_bits(tv, 0) |
184 (get_tarval_sub_bits(tv, 1) << 8) |
185 (get_tarval_sub_bits(tv, 2) << 16) |
186 (get_tarval_sub_bits(tv, 3) << 24);
188 /* lower 32bit are zero, really a 32bit constant */
191 #endif /* CONSTRUCT_SSE_CONST */
192 /* TODO: match all the other float constants */
197 * return NoREG or pic_base in case of PIC.
198 * This is necessary as base address for newly created symbols
200 static ir_node *get_symconst_base(void)
202 ir_graph *irg = current_ir_graph;
204 if (be_get_irg_options(irg)->pic) {
205 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
206 return arch_env->impl->get_pic_base(irg);
213 * Transforms a Const.
215 static ir_node *gen_Const(ir_node *node)
217 ir_node *old_block = get_nodes_block(node);
218 ir_node *block = be_transform_node(old_block);
219 dbg_info *dbgi = get_irn_dbg_info(node);
220 ir_mode *mode = get_irn_mode(node);
222 assert(is_Const(node));
224 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
262 #ifdef CONSTRUCT_SSE_CONST
263 if (mode == mode_D) {
264 unsigned val = get_tarval_sub_bits(tv, 0) |
265 (get_tarval_sub_bits(tv, 1) << 8) |
266 (get_tarval_sub_bits(tv, 2) << 16) |
267 (get_tarval_sub_bits(tv, 3) << 24);
269 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
270 ir_node *cnst, *psllq;
272 /* fine, lower 32bit are zero, produce 32bit value */
273 val = get_tarval_sub_bits(tv, 4) |
274 (get_tarval_sub_bits(tv, 5) << 8) |
275 (get_tarval_sub_bits(tv, 6) << 16) |
276 (get_tarval_sub_bits(tv, 7) << 24);
277 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
278 load = new_bd_ia32_xMovd(dbgi, block, cnst);
279 set_ia32_ls_mode(load, mode);
280 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
281 set_ia32_ls_mode(psllq, mode);
286 #endif /* CONSTRUCT_SSE_CONST */
287 floatent = ia32_create_float_const_entity(node);
289 base = get_symconst_base();
290 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
292 set_ia32_op_type(load, ia32_AddrModeS);
293 set_ia32_am_sc(load, floatent);
294 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
295 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
298 if (is_Const_null(node)) {
299 load = new_bd_ia32_vfldz(dbgi, block);
301 set_ia32_ls_mode(load, mode);
302 } else if (is_Const_one(node)) {
303 load = new_bd_ia32_vfld1(dbgi, block);
305 set_ia32_ls_mode(load, mode);
310 floatent = ia32_create_float_const_entity(node);
311 /* create_float_const_ent is smart and sometimes creates
313 ls_mode = get_type_mode(get_entity_type(floatent));
314 base = get_symconst_base();
315 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
317 set_ia32_op_type(load, ia32_AddrModeS);
318 set_ia32_am_sc(load, floatent);
319 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
320 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
323 #ifdef CONSTRUCT_SSE_CONST
325 #endif /* CONSTRUCT_SSE_CONST */
326 SET_IA32_ORIG_NODE(load, node);
328 be_dep_on_frame(load);
330 } else { /* non-float mode */
332 ir_tarval *tv = get_Const_tarval(node);
335 tv = tarval_convert_to(tv, mode_Iu);
337 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
339 panic("couldn't convert constant tarval (%+F)", node);
341 val = get_tarval_long(tv);
343 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
344 SET_IA32_ORIG_NODE(cnst, node);
346 be_dep_on_frame(cnst);
352 * Transforms a SymConst.
354 static ir_node *gen_SymConst(ir_node *node)
356 ir_node *old_block = get_nodes_block(node);
357 ir_node *block = be_transform_node(old_block);
358 dbg_info *dbgi = get_irn_dbg_info(node);
359 ir_mode *mode = get_irn_mode(node);
362 if (mode_is_float(mode)) {
363 if (ia32_cg_config.use_sse2)
364 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 set_ia32_am_sc(cnst, get_SymConst_entity(node));
368 set_ia32_use_frame(cnst);
372 if (get_SymConst_kind(node) != symconst_addr_ent) {
373 panic("backend only support symconst_addr_ent (at %+F)", node);
375 entity = get_SymConst_entity(node);
376 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
379 SET_IA32_ORIG_NODE(cnst, node);
381 be_dep_on_frame(cnst);
386 * Create a float type for the given mode and cache it.
388 * @param mode the mode for the float type (might be integer mode for SSE2 types)
389 * @param align alignment
391 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
397 if (mode == mode_Iu) {
398 static ir_type *int_Iu[16] = {NULL, };
400 if (int_Iu[align] == NULL) {
401 int_Iu[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return int_Iu[align];
406 } else if (mode == mode_Lu) {
407 static ir_type *int_Lu[16] = {NULL, };
409 if (int_Lu[align] == NULL) {
410 int_Lu[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return int_Lu[align];
415 } else if (mode == mode_F) {
416 static ir_type *float_F[16] = {NULL, };
418 if (float_F[align] == NULL) {
419 float_F[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_F[align];
424 } else if (mode == mode_D) {
425 static ir_type *float_D[16] = {NULL, };
427 if (float_D[align] == NULL) {
428 float_D[align] = tp = new_type_primitive(mode);
429 /* set the specified alignment */
430 set_type_alignment_bytes(tp, align);
432 return float_D[align];
434 static ir_type *float_E[16] = {NULL, };
436 if (float_E[align] == NULL) {
437 float_E[align] = tp = new_type_primitive(mode);
438 /* set the specified alignment */
439 set_type_alignment_bytes(tp, align);
441 return float_E[align];
446 * Create a float[2] array type for the given atomic type.
448 * @param tp the atomic type
450 static ir_type *ia32_create_float_array(ir_type *tp)
452 ir_mode *mode = get_type_mode(tp);
453 unsigned align = get_type_alignment_bytes(tp);
458 if (mode == mode_F) {
459 static ir_type *float_F[16] = {NULL, };
461 if (float_F[align] != NULL)
462 return float_F[align];
463 arr = float_F[align] = new_type_array(1, tp);
464 } else if (mode == mode_D) {
465 static ir_type *float_D[16] = {NULL, };
467 if (float_D[align] != NULL)
468 return float_D[align];
469 arr = float_D[align] = new_type_array(1, tp);
471 static ir_type *float_E[16] = {NULL, };
473 if (float_E[align] != NULL)
474 return float_E[align];
475 arr = float_E[align] = new_type_array(1, tp);
477 set_type_alignment_bytes(arr, align);
478 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
479 set_type_state(arr, layout_fixed);
483 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
484 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
486 static const struct {
487 const char *ent_name;
488 const char *cnst_str;
491 } names [ia32_known_const_max] = {
492 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
493 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
494 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
495 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
496 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
498 static ir_entity *ent_cache[ia32_known_const_max];
500 const char *ent_name, *cnst_str;
506 ent_name = names[kct].ent_name;
507 if (! ent_cache[kct]) {
508 cnst_str = names[kct].cnst_str;
510 switch (names[kct].mode) {
511 case 0: mode = mode_Iu; break;
512 case 1: mode = mode_Lu; break;
513 default: mode = mode_F; break;
515 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
516 tp = ia32_create_float_type(mode, names[kct].align);
518 if (kct == ia32_ULLBIAS)
519 tp = ia32_create_float_array(tp);
520 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
522 set_entity_ld_ident(ent, get_entity_ident(ent));
523 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
524 set_entity_visibility(ent, ir_visibility_private);
526 if (kct == ia32_ULLBIAS) {
527 ir_initializer_t *initializer = create_initializer_compound(2);
529 set_initializer_compound_value(initializer, 0,
530 create_initializer_tarval(get_mode_null(mode)));
531 set_initializer_compound_value(initializer, 1,
532 create_initializer_tarval(tv));
534 set_entity_initializer(ent, initializer);
536 set_entity_initializer(ent, create_initializer_tarval(tv));
539 /* cache the entry */
540 ent_cache[kct] = ent;
543 return ent_cache[kct];
547 * return true if the node is a Proj(Load) and could be used in source address
548 * mode for another node. Will return only true if the @p other node is not
549 * dependent on the memory of the Load (for binary operations use the other
550 * input here, for unary operations use NULL).
552 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
553 ir_node *other, ir_node *other2, match_flags_t flags)
558 /* float constants are always available */
559 if (is_Const(node)) {
560 ir_mode *mode = get_irn_mode(node);
561 if (mode_is_float(mode)) {
562 if (ia32_cg_config.use_sse2) {
563 if (is_simple_sse_Const(node))
566 if (is_simple_x87_Const(node))
569 if (get_irn_n_edges(node) > 1)
577 load = get_Proj_pred(node);
578 pn = get_Proj_proj(node);
579 if (!is_Load(load) || pn != pn_Load_res)
581 if (get_nodes_block(load) != block)
583 /* we only use address mode if we're the only user of the load */
584 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
586 /* in some edge cases with address mode we might reach the load normally
587 * and through some AM sequence, if it is already materialized then we
588 * can't create an AM node from it */
589 if (be_is_transformed(node))
592 /* don't do AM if other node inputs depend on the load (via mem-proj) */
593 if (other != NULL && ia32_prevents_AM(block, load, other))
596 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
602 typedef struct ia32_address_mode_t ia32_address_mode_t;
603 struct ia32_address_mode_t {
608 ia32_op_type_t op_type;
612 unsigned commutative : 1;
613 unsigned ins_permuted : 1;
616 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
618 /* construct load address */
619 memset(addr, 0, sizeof(addr[0]));
620 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
622 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
623 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
624 addr->mem = be_transform_node(mem);
627 static void build_address(ia32_address_mode_t *am, ir_node *node,
628 ia32_create_am_flags_t flags)
630 ia32_address_t *addr = &am->addr;
636 /* floating point immediates */
637 if (is_Const(node)) {
638 ir_entity *entity = ia32_create_float_const_entity(node);
639 addr->base = get_symconst_base();
640 addr->index = noreg_GP;
642 addr->symconst_ent = entity;
644 am->ls_mode = get_type_mode(get_entity_type(entity));
645 am->pinned = op_pin_state_floats;
649 load = get_Proj_pred(node);
650 ptr = get_Load_ptr(load);
651 mem = get_Load_mem(load);
652 new_mem = be_transform_node(mem);
653 am->pinned = get_irn_pinned(load);
654 am->ls_mode = get_Load_mode(load);
655 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
658 /* construct load address */
659 ia32_create_address_mode(addr, ptr, flags);
661 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
662 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
666 static void set_address(ir_node *node, const ia32_address_t *addr)
668 set_ia32_am_scale(node, addr->scale);
669 set_ia32_am_sc(node, addr->symconst_ent);
670 set_ia32_am_offs_int(node, addr->offset);
671 if (addr->symconst_sign)
672 set_ia32_am_sc_sign(node);
674 set_ia32_use_frame(node);
675 set_ia32_frame_ent(node, addr->frame_entity);
679 * Apply attributes of a given address mode to a node.
681 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
683 set_address(node, &am->addr);
685 set_ia32_op_type(node, am->op_type);
686 set_ia32_ls_mode(node, am->ls_mode);
687 if (am->pinned == op_pin_state_pinned) {
688 /* beware: some nodes are already pinned and did not allow to change the state */
689 if (get_irn_pinned(node) != op_pin_state_pinned)
690 set_irn_pinned(node, op_pin_state_pinned);
693 set_ia32_commutative(node);
697 * Check, if a given node is a Down-Conv, ie. a integer Conv
698 * from a mode with a mode with more bits to a mode with lesser bits.
699 * Moreover, we return only true if the node has not more than 1 user.
701 * @param node the node
702 * @return non-zero if node is a Down-Conv
704 static int is_downconv(const ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
726 /** Skip all Down-Conv's on a given node and return the resulting node. */
727 ir_node *ia32_skip_downconv(ir_node *node)
729 while (is_downconv(node))
730 node = get_Conv_op(node);
735 static bool is_sameconv(ir_node *node)
743 /* we only want to skip the conv when we're the only user
744 * (because this test is used in the context of address-mode selection
745 * and we don't want to use address mode for multiple users) */
746 if (get_irn_n_edges(node) > 1)
749 src_mode = get_irn_mode(get_Conv_op(node));
750 dest_mode = get_irn_mode(node);
752 ia32_mode_needs_gp_reg(src_mode) &&
753 ia32_mode_needs_gp_reg(dest_mode) &&
754 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
757 /** Skip all signedness convs */
758 static ir_node *ia32_skip_sameconv(ir_node *node)
760 while (is_sameconv(node))
761 node = get_Conv_op(node);
766 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
768 ir_mode *mode = get_irn_mode(node);
773 if (mode_is_signed(mode)) {
778 block = get_nodes_block(node);
779 dbgi = get_irn_dbg_info(node);
781 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
785 * matches operands of a node into ia32 addressing/operand modes. This covers
786 * usage of source address mode, immediates, operations with non 32-bit modes,
788 * The resulting data is filled into the @p am struct. block is the block
789 * of the node whose arguments are matched. op1, op2 are the first and second
790 * input that are matched (op1 may be NULL). other_op is another unrelated
791 * input that is not matched! but which is needed sometimes to check if AM
792 * for op1/op2 is legal.
793 * @p flags describes the supported modes of the operation in detail.
795 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
796 ir_node *op1, ir_node *op2, ir_node *other_op,
799 ia32_address_t *addr = &am->addr;
800 ir_mode *mode = get_irn_mode(op2);
801 int mode_bits = get_mode_size_bits(mode);
802 ir_node *new_op1, *new_op2;
804 unsigned commutative;
805 int use_am_and_immediates;
808 memset(am, 0, sizeof(am[0]));
810 commutative = (flags & match_commutative) != 0;
811 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
812 use_am = (flags & match_am) != 0;
813 use_immediate = (flags & match_immediate) != 0;
814 assert(!use_am_and_immediates || use_immediate);
817 assert(!commutative || op1 != NULL);
818 assert(use_am || !(flags & match_8bit_am));
819 assert(use_am || !(flags & match_16bit_am));
821 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
822 (mode_bits == 16 && !(flags & match_16bit_am))) {
826 /* we can simply skip downconvs for mode neutral nodes: the upper bits
827 * can be random for these operations */
828 if (flags & match_mode_neutral) {
829 op2 = ia32_skip_downconv(op2);
831 op1 = ia32_skip_downconv(op1);
834 op2 = ia32_skip_sameconv(op2);
836 op1 = ia32_skip_sameconv(op1);
840 /* match immediates. firm nodes are normalized: constants are always on the
843 if (!(flags & match_try_am) && use_immediate) {
844 new_op2 = ia32_try_create_Immediate(op2, 0);
847 if (new_op2 == NULL &&
848 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
849 build_address(am, op2, ia32_create_am_normal);
850 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
851 if (mode_is_float(mode)) {
852 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
856 am->op_type = ia32_AddrModeS;
857 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
859 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
861 build_address(am, op1, ia32_create_am_normal);
863 if (mode_is_float(mode)) {
864 noreg = ia32_new_NoReg_vfp(current_ir_graph);
869 if (new_op2 != NULL) {
872 new_op1 = be_transform_node(op2);
874 am->ins_permuted = 1;
876 am->op_type = ia32_AddrModeS;
879 am->op_type = ia32_Normal;
881 if (flags & match_try_am) {
887 mode = get_irn_mode(op2);
888 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
889 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
891 new_op2 = create_upconv(op2, NULL);
892 am->ls_mode = mode_Iu;
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
897 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1037 if (initial_fpcw != NULL)
1038 return initial_fpcw;
1040 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1041 &ia32_registers[REG_FPCW]);
1042 initial_fpcw = be_transform_node(fpcw);
1044 return initial_fpcw;
1048 * Construct a standard binary operation, set AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_binop_float_func *func)
1058 ir_mode *mode = get_irn_mode(node);
1060 ir_node *block, *new_block, *new_node;
1061 ia32_address_mode_t am;
1062 ia32_address_t *addr = &am.addr;
1063 ia32_x87_attr_t *attr;
1064 /* All operations are considered commutative, because there are reverse
1066 match_flags_t flags = match_commutative;
1068 /* happens for div nodes... */
1070 mode = get_divop_resmod(node);
1072 /* cannot use address mode with long double on x87 */
1073 if (get_mode_size_bits(mode) <= 64)
1076 block = get_nodes_block(node);
1077 match_arguments(&am, block, op1, op2, NULL, flags);
1079 dbgi = get_irn_dbg_info(node);
1080 new_block = be_transform_node(block);
1081 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1082 am.new_op1, am.new_op2, get_fpcw());
1083 set_am_attributes(new_node, &am);
1085 attr = get_ia32_x87_attr(new_node);
1086 attr->attr.data.ins_permuted = am.ins_permuted;
1088 SET_IA32_ORIG_NODE(new_node, node);
1090 new_node = fix_mem_proj(new_node, &am);
1096 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1098 * @param op1 The first operand
1099 * @param op2 The second operand
1100 * @param func The node constructor function
1101 * @return The constructed ia32 node.
1103 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1104 construct_shift_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1110 assert(! mode_is_float(get_irn_mode(node)));
1111 assert(flags & match_immediate);
1112 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1114 if (flags & match_mode_neutral) {
1115 op1 = ia32_skip_downconv(op1);
1116 new_op1 = be_transform_node(op1);
1117 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1118 new_op1 = create_upconv(op1, node);
1120 new_op1 = be_transform_node(op1);
1123 /* the shift amount can be any mode that is bigger than 5 bits, since all
1124 * other bits are ignored anyway */
1125 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1126 ir_node *const op = get_Conv_op(op2);
1127 if (mode_is_float(get_irn_mode(op)))
1130 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1132 new_op2 = create_immediate_or_transform(op2, 0);
1134 dbgi = get_irn_dbg_info(node);
1135 block = get_nodes_block(node);
1136 new_block = be_transform_node(block);
1137 new_node = func(dbgi, new_block, new_op1, new_op2);
1138 SET_IA32_ORIG_NODE(new_node, node);
1140 /* lowered shift instruction may have a dependency operand, handle it here */
1141 if (get_irn_arity(node) == 3) {
1142 /* we have a dependency */
1143 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1144 add_irn_dep(new_node, new_dep);
1152 * Construct a standard unary operation, set AM and immediate if required.
1154 * @param op The operand
1155 * @param func The node constructor function
1156 * @return The constructed ia32 node.
1158 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1159 match_flags_t flags)
1162 ir_node *block, *new_block, *new_op, *new_node;
1164 assert(flags == 0 || flags == match_mode_neutral);
1165 if (flags & match_mode_neutral) {
1166 op = ia32_skip_downconv(op);
1169 new_op = be_transform_node(op);
1170 dbgi = get_irn_dbg_info(node);
1171 block = get_nodes_block(node);
1172 new_block = be_transform_node(block);
1173 new_node = func(dbgi, new_block, new_op);
1175 SET_IA32_ORIG_NODE(new_node, node);
1180 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1181 ia32_address_t *addr)
1183 ir_node *base, *index, *res;
1189 base = be_transform_node(base);
1192 index = addr->index;
1193 if (index == NULL) {
1196 index = be_transform_node(index);
1199 res = new_bd_ia32_Lea(dbgi, block, base, index);
1200 set_address(res, addr);
1206 * Returns non-zero if a given address mode has a symbolic or
1207 * numerical offset != 0.
1209 static int am_has_immediates(const ia32_address_t *addr)
1211 return addr->offset != 0 || addr->symconst_ent != NULL
1212 || addr->frame_entity || addr->use_frame;
1216 * Creates an ia32 Add.
1218 * @return the created ia32 Add node
1220 static ir_node *gen_Add(ir_node *node)
1222 ir_mode *mode = get_irn_mode(node);
1223 ir_node *op1 = get_Add_left(node);
1224 ir_node *op2 = get_Add_right(node);
1226 ir_node *block, *new_block, *new_node, *add_immediate_op;
1227 ia32_address_t addr;
1228 ia32_address_mode_t am;
1230 if (mode_is_float(mode)) {
1231 if (ia32_cg_config.use_sse2)
1232 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1233 match_commutative | match_am);
1235 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1238 ia32_mark_non_am(node);
1240 op2 = ia32_skip_downconv(op2);
1241 op1 = ia32_skip_downconv(op1);
1245 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1246 * 1. Add with immediate -> Lea
1247 * 2. Add with possible source address mode -> Add
1248 * 3. Otherwise -> Lea
1250 memset(&addr, 0, sizeof(addr));
1251 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1252 add_immediate_op = NULL;
1254 dbgi = get_irn_dbg_info(node);
1255 block = get_nodes_block(node);
1256 new_block = be_transform_node(block);
1259 if (addr.base == NULL && addr.index == NULL) {
1260 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1261 addr.symconst_sign, 0, addr.offset);
1262 be_dep_on_frame(new_node);
1263 SET_IA32_ORIG_NODE(new_node, node);
1266 /* add with immediate? */
1267 if (addr.index == NULL) {
1268 add_immediate_op = addr.base;
1269 } else if (addr.base == NULL && addr.scale == 0) {
1270 add_immediate_op = addr.index;
1273 if (add_immediate_op != NULL) {
1274 if (!am_has_immediates(&addr)) {
1275 #ifdef DEBUG_libfirm
1276 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1279 return be_transform_node(add_immediate_op);
1282 new_node = create_lea_from_address(dbgi, new_block, &addr);
1283 SET_IA32_ORIG_NODE(new_node, node);
1287 /* test if we can use source address mode */
1288 match_arguments(&am, block, op1, op2, NULL, match_commutative
1289 | match_mode_neutral | match_am | match_immediate | match_try_am);
1291 /* construct an Add with source address mode */
1292 if (am.op_type == ia32_AddrModeS) {
1293 ia32_address_t *am_addr = &am.addr;
1294 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1295 am_addr->index, am_addr->mem, am.new_op1,
1297 set_am_attributes(new_node, &am);
1298 SET_IA32_ORIG_NODE(new_node, node);
1300 new_node = fix_mem_proj(new_node, &am);
1305 /* otherwise construct a lea */
1306 new_node = create_lea_from_address(dbgi, new_block, &addr);
1307 SET_IA32_ORIG_NODE(new_node, node);
1312 * Creates an ia32 Mul.
1314 * @return the created ia32 Mul node
1316 static ir_node *gen_Mul(ir_node *node)
1318 ir_node *op1 = get_Mul_left(node);
1319 ir_node *op2 = get_Mul_right(node);
1320 ir_mode *mode = get_irn_mode(node);
1322 if (mode_is_float(mode)) {
1323 if (ia32_cg_config.use_sse2)
1324 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1325 match_commutative | match_am);
1327 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1329 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1330 match_commutative | match_am | match_mode_neutral |
1331 match_immediate | match_am_and_immediates);
1335 * Creates an ia32 Mulh.
1336 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1337 * this result while Mul returns the lower 32 bit.
1339 * @return the created ia32 Mulh node
1341 static ir_node *gen_Mulh(ir_node *node)
1343 dbg_info *dbgi = get_irn_dbg_info(node);
1344 ir_node *op1 = get_Mulh_left(node);
1345 ir_node *op2 = get_Mulh_right(node);
1346 ir_mode *mode = get_irn_mode(node);
1348 ir_node *proj_res_high;
1350 if (get_mode_size_bits(mode) != 32) {
1351 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1354 if (mode_is_signed(mode)) {
1355 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1356 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1358 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1359 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1361 return proj_res_high;
1365 * Creates an ia32 And.
1367 * @return The created ia32 And node
1369 static ir_node *gen_And(ir_node *node)
1371 ir_node *op1 = get_And_left(node);
1372 ir_node *op2 = get_And_right(node);
1373 assert(! mode_is_float(get_irn_mode(node)));
1375 /* is it a zero extension? */
1376 if (is_Const(op2)) {
1377 ir_tarval *tv = get_Const_tarval(op2);
1378 long v = get_tarval_long(tv);
1380 if (v == 0xFF || v == 0xFFFF) {
1381 dbg_info *dbgi = get_irn_dbg_info(node);
1382 ir_node *block = get_nodes_block(node);
1389 assert(v == 0xFFFF);
1392 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1397 return gen_binop(node, op1, op2, new_bd_ia32_And,
1398 match_commutative | match_mode_neutral | match_am | match_immediate);
1404 * Creates an ia32 Or.
1406 * @return The created ia32 Or node
1408 static ir_node *gen_Or(ir_node *node)
1410 ir_node *op1 = get_Or_left(node);
1411 ir_node *op2 = get_Or_right(node);
1413 assert (! mode_is_float(get_irn_mode(node)));
1414 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1415 | match_mode_neutral | match_am | match_immediate);
1421 * Creates an ia32 Eor.
1423 * @return The created ia32 Eor node
1425 static ir_node *gen_Eor(ir_node *node)
1427 ir_node *op1 = get_Eor_left(node);
1428 ir_node *op2 = get_Eor_right(node);
1430 assert(! mode_is_float(get_irn_mode(node)));
1431 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1432 | match_mode_neutral | match_am | match_immediate);
1437 * Creates an ia32 Sub.
1439 * @return The created ia32 Sub node
1441 static ir_node *gen_Sub(ir_node *node)
1443 ir_node *op1 = get_Sub_left(node);
1444 ir_node *op2 = get_Sub_right(node);
1445 ir_mode *mode = get_irn_mode(node);
1447 if (mode_is_float(mode)) {
1448 if (ia32_cg_config.use_sse2)
1449 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1451 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1454 if (is_Const(op2)) {
1455 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1459 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1460 | match_am | match_immediate);
1463 static ir_node *transform_AM_mem(ir_node *const block,
1464 ir_node *const src_val,
1465 ir_node *const src_mem,
1466 ir_node *const am_mem)
1468 if (is_NoMem(am_mem)) {
1469 return be_transform_node(src_mem);
1470 } else if (is_Proj(src_val) &&
1472 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1473 /* avoid memory loop */
1475 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1476 ir_node *const ptr_pred = get_Proj_pred(src_val);
1477 int const arity = get_Sync_n_preds(src_mem);
1482 NEW_ARR_A(ir_node*, ins, arity + 1);
1484 /* NOTE: This sometimes produces dead-code because the old sync in
1485 * src_mem might not be used anymore, we should detect this case
1486 * and kill the sync... */
1487 for (i = arity - 1; i >= 0; --i) {
1488 ir_node *const pred = get_Sync_pred(src_mem, i);
1490 /* avoid memory loop */
1491 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1494 ins[n++] = be_transform_node(pred);
1499 return new_r_Sync(block, n, ins);
1503 ins[0] = be_transform_node(src_mem);
1505 return new_r_Sync(block, 2, ins);
1510 * Create a 32bit to 64bit signed extension.
1512 * @param dbgi debug info
1513 * @param block the block where node nodes should be placed
1514 * @param val the value to extend
1515 * @param orig the original node
1517 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1518 ir_node *val, const ir_node *orig)
1523 if (ia32_cg_config.use_short_sex_eax) {
1524 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1525 be_dep_on_frame(pval);
1526 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1528 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1529 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1531 SET_IA32_ORIG_NODE(res, orig);
1536 * Generates an ia32 DivMod with additional infrastructure for the
1537 * register allocator if needed.
1539 static ir_node *create_Div(ir_node *node)
1541 dbg_info *dbgi = get_irn_dbg_info(node);
1542 ir_node *block = get_nodes_block(node);
1543 ir_node *new_block = be_transform_node(block);
1550 ir_node *sign_extension;
1551 ia32_address_mode_t am;
1552 ia32_address_t *addr = &am.addr;
1554 /* the upper bits have random contents for smaller modes */
1555 switch (get_irn_opcode(node)) {
1557 op1 = get_Div_left(node);
1558 op2 = get_Div_right(node);
1559 mem = get_Div_mem(node);
1560 mode = get_Div_resmode(node);
1563 op1 = get_Mod_left(node);
1564 op2 = get_Mod_right(node);
1565 mem = get_Mod_mem(node);
1566 mode = get_Mod_resmode(node);
1569 op1 = get_DivMod_left(node);
1570 op2 = get_DivMod_right(node);
1571 mem = get_DivMod_mem(node);
1572 mode = get_DivMod_resmode(node);
1575 panic("invalid divmod node %+F", node);
1578 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1580 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1581 is the memory of the consumed address. We can have only the second op as address
1582 in Div nodes, so check only op2. */
1583 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1585 if (mode_is_signed(mode)) {
1586 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1587 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1588 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1590 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1591 be_dep_on_frame(sign_extension);
1593 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1594 addr->index, new_mem, am.new_op2,
1595 am.new_op1, sign_extension);
1598 set_irn_pinned(new_node, get_irn_pinned(node));
1600 set_am_attributes(new_node, &am);
1601 SET_IA32_ORIG_NODE(new_node, node);
1603 new_node = fix_mem_proj(new_node, &am);
1609 * Generates an ia32 Mod.
1611 static ir_node *gen_Mod(ir_node *node)
1613 return create_Div(node);
1617 * Generates an ia32 Div.
1619 static ir_node *gen_Div(ir_node *node)
1621 return create_Div(node);
1625 * Generates an ia32 DivMod.
1627 static ir_node *gen_DivMod(ir_node *node)
1629 return create_Div(node);
1635 * Creates an ia32 floating Div.
1637 * @return The created ia32 xDiv node
1639 static ir_node *gen_Quot(ir_node *node)
1641 ir_node *op1 = get_Quot_left(node);
1642 ir_node *op2 = get_Quot_right(node);
1644 if (ia32_cg_config.use_sse2) {
1645 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1647 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1653 * Creates an ia32 Shl.
1655 * @return The created ia32 Shl node
1657 static ir_node *gen_Shl(ir_node *node)
1659 ir_node *left = get_Shl_left(node);
1660 ir_node *right = get_Shl_right(node);
1662 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1663 match_mode_neutral | match_immediate);
1667 * Creates an ia32 Shr.
1669 * @return The created ia32 Shr node
1671 static ir_node *gen_Shr(ir_node *node)
1673 ir_node *left = get_Shr_left(node);
1674 ir_node *right = get_Shr_right(node);
1676 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1682 * Creates an ia32 Sar.
1684 * @return The created ia32 Shrs node
1686 static ir_node *gen_Shrs(ir_node *node)
1688 ir_node *left = get_Shrs_left(node);
1689 ir_node *right = get_Shrs_right(node);
1691 if (is_Const(right)) {
1692 ir_tarval *tv = get_Const_tarval(right);
1693 long val = get_tarval_long(tv);
1695 /* this is a sign extension */
1696 dbg_info *dbgi = get_irn_dbg_info(node);
1697 ir_node *block = be_transform_node(get_nodes_block(node));
1698 ir_node *new_op = be_transform_node(left);
1700 return create_sex_32_64(dbgi, block, new_op, node);
1704 /* 8 or 16 bit sign extension? */
1705 if (is_Const(right) && is_Shl(left)) {
1706 ir_node *shl_left = get_Shl_left(left);
1707 ir_node *shl_right = get_Shl_right(left);
1708 if (is_Const(shl_right)) {
1709 ir_tarval *tv1 = get_Const_tarval(right);
1710 ir_tarval *tv2 = get_Const_tarval(shl_right);
1711 if (tv1 == tv2 && tarval_is_long(tv1)) {
1712 long val = get_tarval_long(tv1);
1713 if (val == 16 || val == 24) {
1714 dbg_info *dbgi = get_irn_dbg_info(node);
1715 ir_node *block = get_nodes_block(node);
1725 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1734 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1740 * Creates an ia32 Rol.
1742 * @param op1 The first operator
1743 * @param op2 The second operator
1744 * @return The created ia32 RotL node
1746 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1748 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1754 * Creates an ia32 Ror.
1755 * NOTE: There is no RotR with immediate because this would always be a RotL
1756 * "imm-mode_size_bits" which can be pre-calculated.
1758 * @param op1 The first operator
1759 * @param op2 The second operator
1760 * @return The created ia32 RotR node
1762 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1764 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1770 * Creates an ia32 RotR or RotL (depending on the found pattern).
1772 * @return The created ia32 RotL or RotR node
1774 static ir_node *gen_Rotl(ir_node *node)
1776 ir_node *op1 = get_Rotl_left(node);
1777 ir_node *op2 = get_Rotl_right(node);
1779 if (is_Minus(op2)) {
1780 return gen_Ror(node, op1, get_Minus_op(op2));
1783 return gen_Rol(node, op1, op2);
1789 * Transforms a Minus node.
1791 * @return The created ia32 Minus node
1793 static ir_node *gen_Minus(ir_node *node)
1795 ir_node *op = get_Minus_op(node);
1796 ir_node *block = be_transform_node(get_nodes_block(node));
1797 dbg_info *dbgi = get_irn_dbg_info(node);
1798 ir_mode *mode = get_irn_mode(node);
1803 if (mode_is_float(mode)) {
1804 ir_node *new_op = be_transform_node(op);
1805 if (ia32_cg_config.use_sse2) {
1806 /* TODO: non-optimal... if we have many xXors, then we should
1807 * rather create a load for the const and use that instead of
1808 * several AM nodes... */
1809 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1811 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1812 noreg_GP, nomem, new_op, noreg_xmm);
1814 size = get_mode_size_bits(mode);
1815 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1817 set_ia32_am_sc(new_node, ent);
1818 set_ia32_op_type(new_node, ia32_AddrModeS);
1819 set_ia32_ls_mode(new_node, mode);
1821 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1824 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1827 SET_IA32_ORIG_NODE(new_node, node);
1833 * Transforms a Not node.
1835 * @return The created ia32 Not node
1837 static ir_node *gen_Not(ir_node *node)
1839 ir_node *op = get_Not_op(node);
1841 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1842 assert (! mode_is_float(get_irn_mode(node)));
1844 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1847 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1848 bool negate, ir_node *node)
1850 ir_node *new_block = be_transform_node(block);
1851 ir_mode *mode = get_irn_mode(op);
1857 if (mode_is_float(mode)) {
1858 new_op = be_transform_node(op);
1860 if (ia32_cg_config.use_sse2) {
1861 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1862 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1863 noreg_GP, nomem, new_op, noreg_fp);
1865 size = get_mode_size_bits(mode);
1866 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1868 set_ia32_am_sc(new_node, ent);
1870 SET_IA32_ORIG_NODE(new_node, node);
1872 set_ia32_op_type(new_node, ia32_AddrModeS);
1873 set_ia32_ls_mode(new_node, mode);
1875 /* TODO, implement -Abs case */
1878 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1879 SET_IA32_ORIG_NODE(new_node, node);
1881 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1882 SET_IA32_ORIG_NODE(new_node, node);
1887 ir_node *sign_extension;
1889 if (get_mode_size_bits(mode) == 32) {
1890 new_op = be_transform_node(op);
1892 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1895 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1897 xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1898 nomem, new_op, sign_extension);
1899 SET_IA32_ORIG_NODE(xorn, node);
1902 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1903 nomem, sign_extension, xorn);
1905 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1906 nomem, xorn, sign_extension);
1908 SET_IA32_ORIG_NODE(new_node, node);
1915 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1917 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1919 dbg_info *dbgi = get_irn_dbg_info(cmp);
1920 ir_node *block = get_nodes_block(cmp);
1921 ir_node *new_block = be_transform_node(block);
1922 ir_node *op1 = be_transform_node(x);
1923 ir_node *op2 = be_transform_node(n);
1925 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1929 * Transform a node returning a "flag" result.
1931 * @param node the node to transform
1932 * @param pnc_out the compare mode to use
1934 static ir_node *get_flags_node(ir_node *node, int *pnc_out)
1941 /* we have a Cmp as input */
1942 if (is_Proj(node)) {
1943 ir_node *pred = get_Proj_pred(node);
1945 int pnc = get_Proj_pn_cmp(node);
1946 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1947 ir_node *l = get_Cmp_left(pred);
1948 ir_node *r = get_Cmp_right(pred);
1950 ir_node *la = get_And_left(l);
1951 ir_node *ra = get_And_right(l);
1953 ir_node *c = get_Shl_left(la);
1954 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1955 /* (1 << n) & ra) */
1956 ir_node *n = get_Shl_right(la);
1957 flags = gen_bt(pred, ra, n);
1958 /* we must generate a Jc/Jnc jump */
1959 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1962 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1967 ir_node *c = get_Shl_left(ra);
1968 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1969 /* la & (1 << n)) */
1970 ir_node *n = get_Shl_right(ra);
1971 flags = gen_bt(pred, la, n);
1972 /* we must generate a Jc/Jnc jump */
1973 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1976 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1982 /* add ia32 compare flags */
1984 ir_node *l = get_Cmp_left(pred);
1985 ir_mode *mode = get_irn_mode(l);
1986 if (mode_is_float(mode))
1987 pnc |= ia32_pn_Cmp_float;
1988 else if (! mode_is_signed(mode))
1989 pnc |= ia32_pn_Cmp_unsigned;
1992 flags = be_transform_node(pred);
1997 /* a mode_b value, we have to compare it against 0 */
1998 dbgi = get_irn_dbg_info(node);
1999 new_block = be_transform_node(get_nodes_block(node));
2000 new_op = be_transform_node(node);
2001 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2002 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2003 *pnc_out = pn_Cmp_Lg;
2008 * Transforms a Load.
2010 * @return the created ia32 Load node
2012 static ir_node *gen_Load(ir_node *node)
2014 ir_node *old_block = get_nodes_block(node);
2015 ir_node *block = be_transform_node(old_block);
2016 ir_node *ptr = get_Load_ptr(node);
2017 ir_node *mem = get_Load_mem(node);
2018 ir_node *new_mem = be_transform_node(mem);
2021 dbg_info *dbgi = get_irn_dbg_info(node);
2022 ir_mode *mode = get_Load_mode(node);
2024 ia32_address_t addr;
2026 /* construct load address */
2027 memset(&addr, 0, sizeof(addr));
2028 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2035 base = be_transform_node(base);
2038 if (index == NULL) {
2041 index = be_transform_node(index);
2044 if (mode_is_float(mode)) {
2045 if (ia32_cg_config.use_sse2) {
2046 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2049 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2053 assert(mode != mode_b);
2055 /* create a conv node with address mode for smaller modes */
2056 if (get_mode_size_bits(mode) < 32) {
2057 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2058 new_mem, noreg_GP, mode);
2060 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2064 set_irn_pinned(new_node, get_irn_pinned(node));
2065 set_ia32_op_type(new_node, ia32_AddrModeS);
2066 set_ia32_ls_mode(new_node, mode);
2067 set_address(new_node, &addr);
2069 if (get_irn_pinned(node) == op_pin_state_floats) {
2070 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2071 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2072 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2073 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2076 SET_IA32_ORIG_NODE(new_node, node);
2078 be_dep_on_frame(new_node);
2082 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2083 ir_node *ptr, ir_node *other)
2090 /* we only use address mode if we're the only user of the load */
2091 if (get_irn_n_edges(node) > 1)
2094 load = get_Proj_pred(node);
2097 if (get_nodes_block(load) != block)
2100 /* store should have the same pointer as the load */
2101 if (get_Load_ptr(load) != ptr)
2104 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2105 if (other != NULL &&
2106 get_nodes_block(other) == block &&
2107 heights_reachable_in_block(ia32_heights, other, load)) {
2111 if (ia32_prevents_AM(block, load, mem))
2113 /* Store should be attached to the load via mem */
2114 assert(heights_reachable_in_block(ia32_heights, mem, load));
2119 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2120 ir_node *mem, ir_node *ptr, ir_mode *mode,
2121 construct_binop_dest_func *func,
2122 construct_binop_dest_func *func8bit,
2123 match_flags_t flags)
2125 ir_node *src_block = get_nodes_block(node);
2133 ia32_address_mode_t am;
2134 ia32_address_t *addr = &am.addr;
2135 memset(&am, 0, sizeof(am));
2137 assert(flags & match_immediate); /* there is no destam node without... */
2138 commutative = (flags & match_commutative) != 0;
2140 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2141 build_address(&am, op1, ia32_create_am_double_use);
2142 new_op = create_immediate_or_transform(op2, 0);
2143 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2144 build_address(&am, op2, ia32_create_am_double_use);
2145 new_op = create_immediate_or_transform(op1, 0);
2150 if (addr->base == NULL)
2151 addr->base = noreg_GP;
2152 if (addr->index == NULL)
2153 addr->index = noreg_GP;
2154 if (addr->mem == NULL)
2157 dbgi = get_irn_dbg_info(node);
2158 block = be_transform_node(src_block);
2159 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2161 if (get_mode_size_bits(mode) == 8) {
2162 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2164 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2166 set_address(new_node, addr);
2167 set_ia32_op_type(new_node, ia32_AddrModeD);
2168 set_ia32_ls_mode(new_node, mode);
2169 SET_IA32_ORIG_NODE(new_node, node);
2171 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2172 mem_proj = be_transform_node(am.mem_proj);
2173 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2178 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2179 ir_node *ptr, ir_mode *mode,
2180 construct_unop_dest_func *func)
2182 ir_node *src_block = get_nodes_block(node);
2188 ia32_address_mode_t am;
2189 ia32_address_t *addr = &am.addr;
2191 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2194 memset(&am, 0, sizeof(am));
2195 build_address(&am, op, ia32_create_am_double_use);
2197 dbgi = get_irn_dbg_info(node);
2198 block = be_transform_node(src_block);
2199 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2200 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2201 set_address(new_node, addr);
2202 set_ia32_op_type(new_node, ia32_AddrModeD);
2203 set_ia32_ls_mode(new_node, mode);
2204 SET_IA32_ORIG_NODE(new_node, node);
2206 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2207 mem_proj = be_transform_node(am.mem_proj);
2208 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2213 static int ia32_get_negated_pnc(int pnc)
2215 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2216 return get_negated_pnc(pnc, mode);
2219 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2221 ir_mode *mode = get_irn_mode(node);
2222 ir_node *mux_true = get_Mux_true(node);
2223 ir_node *mux_false = get_Mux_false(node);
2232 ia32_address_t addr;
2234 if (get_mode_size_bits(mode) != 8)
2237 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2239 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2245 cond = get_Mux_sel(node);
2246 flags = get_flags_node(cond, &pnc);
2247 /* we can't handle the float special cases with SetM */
2248 if (pnc & ia32_pn_Cmp_float)
2251 pnc = ia32_get_negated_pnc(pnc);
2253 build_address_ptr(&addr, ptr, mem);
2255 dbgi = get_irn_dbg_info(node);
2256 block = get_nodes_block(node);
2257 new_block = be_transform_node(block);
2258 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2259 addr.index, addr.mem, flags, pnc);
2260 set_address(new_node, &addr);
2261 set_ia32_op_type(new_node, ia32_AddrModeD);
2262 set_ia32_ls_mode(new_node, mode);
2263 SET_IA32_ORIG_NODE(new_node, node);
2268 static ir_node *try_create_dest_am(ir_node *node)
2270 ir_node *val = get_Store_value(node);
2271 ir_node *mem = get_Store_mem(node);
2272 ir_node *ptr = get_Store_ptr(node);
2273 ir_mode *mode = get_irn_mode(val);
2274 unsigned bits = get_mode_size_bits(mode);
2279 /* handle only GP modes for now... */
2280 if (!ia32_mode_needs_gp_reg(mode))
2284 /* store must be the only user of the val node */
2285 if (get_irn_n_edges(val) > 1)
2287 /* skip pointless convs */
2289 ir_node *conv_op = get_Conv_op(val);
2290 ir_mode *pred_mode = get_irn_mode(conv_op);
2291 if (!ia32_mode_needs_gp_reg(pred_mode))
2293 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2301 /* value must be in the same block */
2302 if (get_nodes_block(node) != get_nodes_block(val))
2305 switch (get_irn_opcode(val)) {
2307 op1 = get_Add_left(val);
2308 op2 = get_Add_right(val);
2309 if (ia32_cg_config.use_incdec) {
2310 if (is_Const_1(op2)) {
2311 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2313 } else if (is_Const_Minus_1(op2)) {
2314 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2318 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2319 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2320 match_commutative | match_immediate);
2323 op1 = get_Sub_left(val);
2324 op2 = get_Sub_right(val);
2325 if (is_Const(op2)) {
2326 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2328 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2329 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2333 op1 = get_And_left(val);
2334 op2 = get_And_right(val);
2335 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2336 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2337 match_commutative | match_immediate);
2340 op1 = get_Or_left(val);
2341 op2 = get_Or_right(val);
2342 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2343 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2344 match_commutative | match_immediate);
2347 op1 = get_Eor_left(val);
2348 op2 = get_Eor_right(val);
2349 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2350 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2351 match_commutative | match_immediate);
2354 op1 = get_Shl_left(val);
2355 op2 = get_Shl_right(val);
2356 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2357 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2361 op1 = get_Shr_left(val);
2362 op2 = get_Shr_right(val);
2363 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2364 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2368 op1 = get_Shrs_left(val);
2369 op2 = get_Shrs_right(val);
2370 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2371 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2375 op1 = get_Rotl_left(val);
2376 op2 = get_Rotl_right(val);
2377 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2378 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2381 /* TODO: match ROR patterns... */
2383 new_node = try_create_SetMem(val, ptr, mem);
2387 op1 = get_Minus_op(val);
2388 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2391 /* should be lowered already */
2392 assert(mode != mode_b);
2393 op1 = get_Not_op(val);
2394 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2400 if (new_node != NULL) {
2401 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2402 get_irn_pinned(node) == op_pin_state_pinned) {
2403 set_irn_pinned(new_node, op_pin_state_pinned);
2410 static bool possible_int_mode_for_fp(ir_mode *mode)
2414 if (!mode_is_signed(mode))
2416 size = get_mode_size_bits(mode);
2417 if (size != 16 && size != 32)
2422 static int is_float_to_int_conv(const ir_node *node)
2424 ir_mode *mode = get_irn_mode(node);
2428 if (!possible_int_mode_for_fp(mode))
2433 conv_op = get_Conv_op(node);
2434 conv_mode = get_irn_mode(conv_op);
2436 if (!mode_is_float(conv_mode))
2443 * Transform a Store(floatConst) into a sequence of
2446 * @return the created ia32 Store node
2448 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2450 ir_mode *mode = get_irn_mode(cns);
2451 unsigned size = get_mode_size_bytes(mode);
2452 ir_tarval *tv = get_Const_tarval(cns);
2453 ir_node *block = get_nodes_block(node);
2454 ir_node *new_block = be_transform_node(block);
2455 ir_node *ptr = get_Store_ptr(node);
2456 ir_node *mem = get_Store_mem(node);
2457 dbg_info *dbgi = get_irn_dbg_info(node);
2461 ia32_address_t addr;
2463 assert(size % 4 == 0);
2466 build_address_ptr(&addr, ptr, mem);
2470 get_tarval_sub_bits(tv, ofs) |
2471 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2472 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2473 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2474 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2476 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2477 addr.index, addr.mem, imm);
2479 set_irn_pinned(new_node, get_irn_pinned(node));
2480 set_ia32_op_type(new_node, ia32_AddrModeD);
2481 set_ia32_ls_mode(new_node, mode_Iu);
2482 set_address(new_node, &addr);
2483 SET_IA32_ORIG_NODE(new_node, node);
2486 ins[i++] = new_node;
2491 } while (size != 0);
2494 return new_rd_Sync(dbgi, new_block, i, ins);
2501 * Generate a vfist or vfisttp instruction.
2503 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2504 ir_node *mem, ir_node *val, ir_node **fist)
2508 if (ia32_cg_config.use_fisttp) {
2509 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2510 if other users exists */
2511 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2512 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2513 be_new_Keep(block, 1, &value);
2515 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2518 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2521 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2527 * Transforms a general (no special case) Store.
2529 * @return the created ia32 Store node
2531 static ir_node *gen_general_Store(ir_node *node)
2533 ir_node *val = get_Store_value(node);
2534 ir_mode *mode = get_irn_mode(val);
2535 ir_node *block = get_nodes_block(node);
2536 ir_node *new_block = be_transform_node(block);
2537 ir_node *ptr = get_Store_ptr(node);
2538 ir_node *mem = get_Store_mem(node);
2539 dbg_info *dbgi = get_irn_dbg_info(node);
2540 ir_node *new_val, *new_node, *store;
2541 ia32_address_t addr;
2543 /* check for destination address mode */
2544 new_node = try_create_dest_am(node);
2545 if (new_node != NULL)
2548 /* construct store address */
2549 memset(&addr, 0, sizeof(addr));
2550 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2552 if (addr.base == NULL) {
2553 addr.base = noreg_GP;
2555 addr.base = be_transform_node(addr.base);
2558 if (addr.index == NULL) {
2559 addr.index = noreg_GP;
2561 addr.index = be_transform_node(addr.index);
2563 addr.mem = be_transform_node(mem);
2565 if (mode_is_float(mode)) {
2566 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2568 while (is_Conv(val) && mode == get_irn_mode(val)) {
2569 ir_node *op = get_Conv_op(val);
2570 if (!mode_is_float(get_irn_mode(op)))
2574 new_val = be_transform_node(val);
2575 if (ia32_cg_config.use_sse2) {
2576 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2577 addr.index, addr.mem, new_val);
2579 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2580 addr.index, addr.mem, new_val, mode);
2583 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2584 val = get_Conv_op(val);
2586 /* TODO: is this optimisation still necessary at all (middleend)? */
2587 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2588 while (is_Conv(val)) {
2589 ir_node *op = get_Conv_op(val);
2590 if (!mode_is_float(get_irn_mode(op)))
2592 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2596 new_val = be_transform_node(val);
2597 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2599 new_val = create_immediate_or_transform(val, 0);
2600 assert(mode != mode_b);
2602 if (get_mode_size_bits(mode) == 8) {
2603 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2604 addr.index, addr.mem, new_val);
2606 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2607 addr.index, addr.mem, new_val);
2612 set_irn_pinned(store, get_irn_pinned(node));
2613 set_ia32_op_type(store, ia32_AddrModeD);
2614 set_ia32_ls_mode(store, mode);
2616 set_address(store, &addr);
2617 SET_IA32_ORIG_NODE(store, node);
2623 * Transforms a Store.
2625 * @return the created ia32 Store node
2627 static ir_node *gen_Store(ir_node *node)
2629 ir_node *val = get_Store_value(node);
2630 ir_mode *mode = get_irn_mode(val);
2632 if (mode_is_float(mode) && is_Const(val)) {
2633 /* We can transform every floating const store
2634 into a sequence of integer stores.
2635 If the constant is already in a register,
2636 it would be better to use it, but we don't
2637 have this information here. */
2638 return gen_float_const_Store(node, val);
2640 return gen_general_Store(node);
2644 * Transforms a Switch.
2646 * @return the created ia32 SwitchJmp node
2648 static ir_node *create_Switch(ir_node *node)
2650 dbg_info *dbgi = get_irn_dbg_info(node);
2651 ir_node *block = be_transform_node(get_nodes_block(node));
2652 ir_node *sel = get_Cond_selector(node);
2653 ir_node *new_sel = be_transform_node(sel);
2654 long switch_min = LONG_MAX;
2655 long switch_max = LONG_MIN;
2656 long default_pn = get_Cond_default_proj(node);
2658 const ir_edge_t *edge;
2660 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2662 /* determine the smallest switch case value */
2663 foreach_out_edge(node, edge) {
2664 ir_node *proj = get_edge_src_irn(edge);
2665 long pn = get_Proj_proj(proj);
2666 if (pn == default_pn)
2669 if (pn < switch_min)
2671 if (pn > switch_max)
2675 if ((unsigned long) (switch_max - switch_min) > 128000) {
2676 panic("Size of switch %+F bigger than 128000", node);
2679 if (switch_min != 0) {
2680 /* if smallest switch case is not 0 we need an additional sub */
2681 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2682 add_ia32_am_offs_int(new_sel, -switch_min);
2683 set_ia32_op_type(new_sel, ia32_AddrModeS);
2685 SET_IA32_ORIG_NODE(new_sel, node);
2688 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2689 SET_IA32_ORIG_NODE(new_node, node);
2695 * Transform a Cond node.
2697 static ir_node *gen_Cond(ir_node *node)
2699 ir_node *block = get_nodes_block(node);
2700 ir_node *new_block = be_transform_node(block);
2701 dbg_info *dbgi = get_irn_dbg_info(node);
2702 ir_node *sel = get_Cond_selector(node);
2703 ir_mode *sel_mode = get_irn_mode(sel);
2704 ir_node *flags = NULL;
2708 if (sel_mode != mode_b) {
2709 return create_Switch(node);
2712 /* we get flags from a Cmp */
2713 flags = get_flags_node(sel, &pnc);
2715 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2716 SET_IA32_ORIG_NODE(new_node, node);
2722 * Transform a be_Copy.
2724 static ir_node *gen_be_Copy(ir_node *node)
2726 ir_node *new_node = be_duplicate_node(node);
2727 ir_mode *mode = get_irn_mode(new_node);
2729 if (ia32_mode_needs_gp_reg(mode)) {
2730 set_irn_mode(new_node, mode_Iu);
2736 static ir_node *create_Fucom(ir_node *node)
2738 dbg_info *dbgi = get_irn_dbg_info(node);
2739 ir_node *block = get_nodes_block(node);
2740 ir_node *new_block = be_transform_node(block);
2741 ir_node *left = get_Cmp_left(node);
2742 ir_node *new_left = be_transform_node(left);
2743 ir_node *right = get_Cmp_right(node);
2747 if (ia32_cg_config.use_fucomi) {
2748 new_right = be_transform_node(right);
2749 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2751 set_ia32_commutative(new_node);
2752 SET_IA32_ORIG_NODE(new_node, node);
2754 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2755 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2757 new_right = be_transform_node(right);
2758 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2761 set_ia32_commutative(new_node);
2763 SET_IA32_ORIG_NODE(new_node, node);
2765 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2766 SET_IA32_ORIG_NODE(new_node, node);
2772 static ir_node *create_Ucomi(ir_node *node)
2774 dbg_info *dbgi = get_irn_dbg_info(node);
2775 ir_node *src_block = get_nodes_block(node);
2776 ir_node *new_block = be_transform_node(src_block);
2777 ir_node *left = get_Cmp_left(node);
2778 ir_node *right = get_Cmp_right(node);
2780 ia32_address_mode_t am;
2781 ia32_address_t *addr = &am.addr;
2783 match_arguments(&am, src_block, left, right, NULL,
2784 match_commutative | match_am);
2786 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2787 addr->mem, am.new_op1, am.new_op2,
2789 set_am_attributes(new_node, &am);
2791 SET_IA32_ORIG_NODE(new_node, node);
2793 new_node = fix_mem_proj(new_node, &am);
2799 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2800 * to fold an and into a test node
2802 static bool can_fold_test_and(ir_node *node)
2804 const ir_edge_t *edge;
2806 /** we can only have eq and lg projs */
2807 foreach_out_edge(node, edge) {
2808 ir_node *proj = get_edge_src_irn(edge);
2809 pn_Cmp pnc = get_Proj_pn_cmp(proj);
2810 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2818 * returns true if it is assured, that the upper bits of a node are "clean"
2819 * which means for a 16 or 8 bit value, that the upper bits in the register
2820 * are 0 for unsigned and a copy of the last significant bit for signed
2823 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2825 assert(ia32_mode_needs_gp_reg(mode));
2826 if (get_mode_size_bits(mode) >= 32)
2829 if (is_Proj(transformed_node))
2830 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2832 switch (get_ia32_irn_opcode(transformed_node)) {
2833 case iro_ia32_Conv_I2I:
2834 case iro_ia32_Conv_I2I8Bit: {
2835 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2836 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2838 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2845 if (mode_is_signed(mode)) {
2846 return false; /* TODO handle signed modes */
2848 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2849 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2850 const ia32_immediate_attr_t *attr
2851 = get_ia32_immediate_attr_const(right);
2852 if (attr->symconst == 0 &&
2853 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2857 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2861 /* TODO too conservative if shift amount is constant */
2862 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2865 if (!mode_is_signed(mode)) {
2867 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2868 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2870 /* TODO if one is known to be zero extended, then || is sufficient */
2875 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2876 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2878 case iro_ia32_Const:
2879 case iro_ia32_Immediate: {
2880 const ia32_immediate_attr_t *attr =
2881 get_ia32_immediate_attr_const(transformed_node);
2882 if (mode_is_signed(mode)) {
2883 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2884 return shifted == 0 || shifted == -1;
2886 unsigned long shifted = (unsigned long)attr->offset;
2887 shifted >>= get_mode_size_bits(mode);
2888 return shifted == 0;
2898 * Generate code for a Cmp.
2900 static ir_node *gen_Cmp(ir_node *node)
2902 dbg_info *dbgi = get_irn_dbg_info(node);
2903 ir_node *block = get_nodes_block(node);
2904 ir_node *new_block = be_transform_node(block);
2905 ir_node *left = get_Cmp_left(node);
2906 ir_node *right = get_Cmp_right(node);
2907 ir_mode *cmp_mode = get_irn_mode(left);
2909 ia32_address_mode_t am;
2910 ia32_address_t *addr = &am.addr;
2913 if (mode_is_float(cmp_mode)) {
2914 if (ia32_cg_config.use_sse2) {
2915 return create_Ucomi(node);
2917 return create_Fucom(node);
2921 assert(ia32_mode_needs_gp_reg(cmp_mode));
2923 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2924 cmp_unsigned = !mode_is_signed(cmp_mode);
2925 if (is_Const_0(right) &&
2927 get_irn_n_edges(left) == 1 &&
2928 can_fold_test_and(node)) {
2929 /* Test(and_left, and_right) */
2930 ir_node *and_left = get_And_left(left);
2931 ir_node *and_right = get_And_right(left);
2933 /* matze: code here used mode instead of cmd_mode, I think it is always
2934 * the same as cmp_mode, but I leave this here to see if this is really
2937 assert(get_irn_mode(and_left) == cmp_mode);
2939 match_arguments(&am, block, and_left, and_right, NULL,
2941 match_am | match_8bit_am | match_16bit_am |
2942 match_am_and_immediates | match_immediate);
2944 /* use 32bit compare mode if possible since the opcode is smaller */
2945 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2946 upper_bits_clean(am.new_op2, cmp_mode)) {
2947 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2950 if (get_mode_size_bits(cmp_mode) == 8) {
2951 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2952 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2955 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2956 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2959 /* Cmp(left, right) */
2960 match_arguments(&am, block, left, right, NULL,
2961 match_commutative | match_am | match_8bit_am |
2962 match_16bit_am | match_am_and_immediates |
2964 /* use 32bit compare mode if possible since the opcode is smaller */
2965 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2966 upper_bits_clean(am.new_op2, cmp_mode)) {
2967 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2970 if (get_mode_size_bits(cmp_mode) == 8) {
2971 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2972 addr->index, addr->mem, am.new_op1,
2973 am.new_op2, am.ins_permuted,
2976 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2977 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2980 set_am_attributes(new_node, &am);
2981 set_ia32_ls_mode(new_node, cmp_mode);
2983 SET_IA32_ORIG_NODE(new_node, node);
2985 new_node = fix_mem_proj(new_node, &am);
2990 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2993 dbg_info *dbgi = get_irn_dbg_info(node);
2994 ir_node *block = get_nodes_block(node);
2995 ir_node *new_block = be_transform_node(block);
2996 ir_node *val_true = get_Mux_true(node);
2997 ir_node *val_false = get_Mux_false(node);
2999 ia32_address_mode_t am;
3000 ia32_address_t *addr;
3002 assert(ia32_cg_config.use_cmov);
3003 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3007 match_arguments(&am, block, val_false, val_true, flags,
3008 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3010 if (am.ins_permuted)
3011 pnc = ia32_get_negated_pnc(pnc);
3013 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3014 addr->mem, am.new_op1, am.new_op2, new_flags,
3016 set_am_attributes(new_node, &am);
3018 SET_IA32_ORIG_NODE(new_node, node);
3020 new_node = fix_mem_proj(new_node, &am);
3026 * Creates a ia32 Setcc instruction.
3028 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3029 ir_node *flags, int pnc,
3032 ir_mode *mode = get_irn_mode(orig_node);
3035 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3036 SET_IA32_ORIG_NODE(new_node, orig_node);
3038 /* we might need to conv the result up */
3039 if (get_mode_size_bits(mode) > 8) {
3040 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3041 nomem, new_node, mode_Bu);
3042 SET_IA32_ORIG_NODE(new_node, orig_node);
3049 * Create instruction for an unsigned Difference or Zero.
3051 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3053 ir_mode *mode = get_irn_mode(psi);
3063 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3064 match_mode_neutral | match_am | match_immediate | match_two_users);
3066 block = get_nodes_block(new_node);
3068 if (is_Proj(new_node)) {
3069 sub = get_Proj_pred(new_node);
3070 assert(is_ia32_Sub(sub));
3073 set_irn_mode(sub, mode_T);
3074 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3076 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3078 dbgi = get_irn_dbg_info(psi);
3079 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3080 notn = new_bd_ia32_Not(dbgi, block, sbb);
3082 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3083 set_ia32_commutative(new_node);
3088 * Create an const array of two float consts.
3090 * @param c0 the first constant
3091 * @param c1 the second constant
3092 * @param new_mode IN/OUT for the mode of the constants, if NULL
3093 * smallest possible mode will be used
3095 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3098 ir_mode *mode = *new_mode;
3100 ir_initializer_t *initializer;
3101 ir_tarval *tv0 = get_Const_tarval(c0);
3102 ir_tarval *tv1 = get_Const_tarval(c1);
3105 /* detect the best mode for the constants */
3106 mode = get_tarval_mode(tv0);
3108 if (mode != mode_F) {
3109 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3110 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3112 tv0 = tarval_convert_to(tv0, mode);
3113 tv1 = tarval_convert_to(tv1, mode);
3114 } else if (mode != mode_D) {
3115 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3116 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3118 tv0 = tarval_convert_to(tv0, mode);
3119 tv1 = tarval_convert_to(tv1, mode);
3126 tp = ia32_create_float_type(mode, 4);
3127 tp = ia32_create_float_array(tp);
3129 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3131 set_entity_ld_ident(ent, get_entity_ident(ent));
3132 set_entity_visibility(ent, ir_visibility_private);
3133 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3135 initializer = create_initializer_compound(2);
3137 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3138 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3140 set_entity_initializer(ent, initializer);
3147 * Possible transformations for creating a Setcc.
3149 enum setcc_transform_insn {
3162 typedef struct setcc_transform {
3166 enum setcc_transform_insn transform;
3170 } setcc_transform_t;
3173 * Setcc can only handle 0 and 1 result.
3174 * Find a transformation that creates 0 and 1 from
3177 static void find_const_transform(int pnc, ir_tarval *t, ir_tarval *f,
3178 setcc_transform_t *res)
3184 if (tarval_is_null(t)) {
3188 pnc = ia32_get_negated_pnc(pnc);
3189 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3190 // now, t is the bigger one
3194 pnc = ia32_get_negated_pnc(pnc);
3198 if (! tarval_is_null(f)) {
3199 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3202 res->steps[step].transform = SETCC_TR_ADD;
3204 if (t == tarval_bad)
3205 panic("constant subtract failed");
3206 if (! tarval_is_long(f))
3207 panic("tarval is not long");
3209 res->steps[step].val = get_tarval_long(f);
3211 f = tarval_sub(f, f, NULL);
3212 assert(tarval_is_null(f));
3215 if (tarval_is_one(t)) {
3216 res->steps[step].transform = SETCC_TR_SET;
3217 res->num_steps = ++step;
3221 if (tarval_is_minus_one(t)) {
3222 res->steps[step].transform = SETCC_TR_NEG;
3224 res->steps[step].transform = SETCC_TR_SET;
3225 res->num_steps = ++step;
3228 if (tarval_is_long(t)) {
3229 long v = get_tarval_long(t);
3231 res->steps[step].val = 0;
3234 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3236 res->steps[step].transform = SETCC_TR_LEAxx;
3237 res->steps[step].scale = 3; /* (a << 3) + a */
3240 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3242 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3243 res->steps[step].scale = 3; /* (a << 3) */
3246 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3248 res->steps[step].transform = SETCC_TR_LEAxx;
3249 res->steps[step].scale = 2; /* (a << 2) + a */
3252 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3254 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3255 res->steps[step].scale = 2; /* (a << 2) */
3258 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3260 res->steps[step].transform = SETCC_TR_LEAxx;
3261 res->steps[step].scale = 1; /* (a << 1) + a */
3264 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3266 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3267 res->steps[step].scale = 1; /* (a << 1) */
3270 res->num_steps = step;
3273 if (! tarval_is_single_bit(t)) {
3274 res->steps[step].transform = SETCC_TR_AND;
3275 res->steps[step].val = v;
3277 res->steps[step].transform = SETCC_TR_NEG;
3279 int v = get_tarval_lowest_bit(t);
3282 res->steps[step].transform = SETCC_TR_SHL;
3283 res->steps[step].scale = v;
3287 res->steps[step].transform = SETCC_TR_SET;
3288 res->num_steps = ++step;
3291 panic("tarval is not long");
3295 * Transforms a Mux node into some code sequence.
3297 * @return The transformed node.
3299 static ir_node *gen_Mux(ir_node *node)
3301 dbg_info *dbgi = get_irn_dbg_info(node);
3302 ir_node *block = get_nodes_block(node);
3303 ir_node *new_block = be_transform_node(block);
3304 ir_node *mux_true = get_Mux_true(node);
3305 ir_node *mux_false = get_Mux_false(node);
3306 ir_node *cond = get_Mux_sel(node);
3307 ir_mode *mode = get_irn_mode(node);
3313 assert(get_irn_mode(cond) == mode_b);
3315 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3317 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3320 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3321 if (mode_is_float(mode)) {
3322 ir_node *cmp = get_Proj_pred(cond);
3323 ir_node *cmp_left = get_Cmp_left(cmp);
3324 ir_node *cmp_right = get_Cmp_right(cmp);
3325 int pnc = get_Proj_proj(cond);
3327 if (ia32_cg_config.use_sse2) {
3328 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3329 if (cmp_left == mux_true && cmp_right == mux_false) {
3330 /* Mux(a <= b, a, b) => MIN */
3331 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3332 match_commutative | match_am | match_two_users);
3333 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3334 /* Mux(a <= b, b, a) => MAX */
3335 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3336 match_commutative | match_am | match_two_users);
3338 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3339 if (cmp_left == mux_true && cmp_right == mux_false) {
3340 /* Mux(a >= b, a, b) => MAX */
3341 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3342 match_commutative | match_am | match_two_users);
3343 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3344 /* Mux(a >= b, b, a) => MIN */
3345 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3346 match_commutative | match_am | match_two_users);
3351 if (is_Const(mux_true) && is_Const(mux_false)) {
3352 ia32_address_mode_t am;
3357 flags = get_flags_node(cond, &pnc);
3358 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3360 if (ia32_cg_config.use_sse2) {
3361 /* cannot load from different mode on SSE */
3364 /* x87 can load any mode */
3368 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3370 switch (get_mode_size_bytes(new_mode)) {
3380 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3381 set_ia32_am_scale(new_node, 2);
3386 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3387 set_ia32_am_scale(new_node, 1);
3390 /* arg, shift 16 NOT supported */
3392 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3395 panic("Unsupported constant size");
3398 am.ls_mode = new_mode;
3399 am.addr.base = get_symconst_base();
3400 am.addr.index = new_node;
3401 am.addr.mem = nomem;
3403 am.addr.scale = scale;
3404 am.addr.use_frame = 0;
3405 am.addr.frame_entity = NULL;
3406 am.addr.symconst_sign = 0;
3407 am.mem_proj = am.addr.mem;
3408 am.op_type = ia32_AddrModeS;
3411 am.pinned = op_pin_state_floats;
3413 am.ins_permuted = 0;
3415 if (ia32_cg_config.use_sse2)
3416 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3418 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3419 set_am_attributes(load, &am);
3421 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3423 panic("cannot transform floating point Mux");
3426 assert(ia32_mode_needs_gp_reg(mode));
3428 if (is_Proj(cond)) {
3429 ir_node *cmp = get_Proj_pred(cond);
3431 ir_node *cmp_left = get_Cmp_left(cmp);
3432 ir_node *cmp_right = get_Cmp_right(cmp);
3433 ir_node *val_true = mux_true;
3434 ir_node *val_false = mux_false;
3435 int pnc = get_Proj_proj(cond);
3437 if (is_Const(val_true) && is_Const_null(val_true)) {
3438 ir_node *tmp = val_false;
3439 val_false = val_true;
3441 pnc = ia32_get_negated_pnc(pnc);
3443 if (is_Const_0(val_false) && is_Sub(val_true)) {
3444 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3445 && get_Sub_left(val_true) == cmp_left
3446 && get_Sub_right(val_true) == cmp_right) {
3447 return create_doz(node, cmp_left, cmp_right);
3449 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3450 && get_Sub_left(val_true) == cmp_right
3451 && get_Sub_right(val_true) == cmp_left) {
3452 return create_doz(node, cmp_right, cmp_left);
3458 flags = get_flags_node(cond, &pnc);
3460 if (is_Const(mux_true) && is_Const(mux_false)) {
3461 /* both are const, good */
3462 ir_tarval *tv_true = get_Const_tarval(mux_true);
3463 ir_tarval *tv_false = get_Const_tarval(mux_false);
3464 setcc_transform_t res;
3467 find_const_transform(pnc, tv_true, tv_false, &res);
3469 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3472 switch (res.steps[step].transform) {
3474 imm = ia32_immediate_from_long(res.steps[step].val);
3475 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3477 case SETCC_TR_ADDxx:
3478 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3481 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3482 set_ia32_am_scale(new_node, res.steps[step].scale);
3483 set_ia32_am_offs_int(new_node, res.steps[step].val);
3485 case SETCC_TR_LEAxx:
3486 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3487 set_ia32_am_scale(new_node, res.steps[step].scale);
3488 set_ia32_am_offs_int(new_node, res.steps[step].val);
3491 imm = ia32_immediate_from_long(res.steps[step].scale);
3492 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3495 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3498 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3501 imm = ia32_immediate_from_long(res.steps[step].val);
3502 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3505 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3508 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3511 panic("unknown setcc transform");
3515 new_node = create_CMov(node, cond, flags, pnc);
3523 * Create a conversion from x87 state register to general purpose.
3525 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3527 ir_node *block = be_transform_node(get_nodes_block(node));
3528 ir_node *op = get_Conv_op(node);
3529 ir_node *new_op = be_transform_node(op);
3530 ir_graph *irg = current_ir_graph;
3531 dbg_info *dbgi = get_irn_dbg_info(node);
3532 ir_mode *mode = get_irn_mode(node);
3533 ir_node *fist, *load, *mem;
3535 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3536 set_irn_pinned(fist, op_pin_state_floats);
3537 set_ia32_use_frame(fist);
3538 set_ia32_op_type(fist, ia32_AddrModeD);
3540 assert(get_mode_size_bits(mode) <= 32);
3541 /* exception we can only store signed 32 bit integers, so for unsigned
3542 we store a 64bit (signed) integer and load the lower bits */
3543 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3544 set_ia32_ls_mode(fist, mode_Ls);
3546 set_ia32_ls_mode(fist, mode_Is);
3548 SET_IA32_ORIG_NODE(fist, node);
3551 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3553 set_irn_pinned(load, op_pin_state_floats);
3554 set_ia32_use_frame(load);
3555 set_ia32_op_type(load, ia32_AddrModeS);
3556 set_ia32_ls_mode(load, mode_Is);
3557 if (get_ia32_ls_mode(fist) == mode_Ls) {
3558 ia32_attr_t *attr = get_ia32_attr(load);
3559 attr->data.need_64bit_stackent = 1;
3561 ia32_attr_t *attr = get_ia32_attr(load);
3562 attr->data.need_32bit_stackent = 1;
3564 SET_IA32_ORIG_NODE(load, node);
3566 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3570 * Creates a x87 strict Conv by placing a Store and a Load
3572 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3574 ir_node *block = get_nodes_block(node);
3575 ir_graph *irg = get_Block_irg(block);
3576 dbg_info *dbgi = get_irn_dbg_info(node);
3577 ir_node *frame = get_irg_frame(irg);
3578 ir_node *store, *load;
3581 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3582 set_ia32_use_frame(store);
3583 set_ia32_op_type(store, ia32_AddrModeD);
3584 SET_IA32_ORIG_NODE(store, node);
3586 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3587 set_ia32_use_frame(load);
3588 set_ia32_op_type(load, ia32_AddrModeS);
3589 SET_IA32_ORIG_NODE(load, node);
3591 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3595 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3596 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3598 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3600 func = get_mode_size_bits(mode) == 8 ?
3601 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3602 return func(dbgi, block, base, index, mem, val, mode);
3606 * Create a conversion from general purpose to x87 register
3608 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3610 ir_node *src_block = get_nodes_block(node);
3611 ir_node *block = be_transform_node(src_block);
3612 ir_graph *irg = get_Block_irg(block);
3613 dbg_info *dbgi = get_irn_dbg_info(node);
3614 ir_node *op = get_Conv_op(node);
3615 ir_node *new_op = NULL;
3617 ir_mode *store_mode;
3622 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3623 if (possible_int_mode_for_fp(src_mode)) {
3624 ia32_address_mode_t am;
3626 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3627 if (am.op_type == ia32_AddrModeS) {
3628 ia32_address_t *addr = &am.addr;
3630 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3631 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3633 set_am_attributes(fild, &am);
3634 SET_IA32_ORIG_NODE(fild, node);
3636 fix_mem_proj(fild, &am);
3641 if (new_op == NULL) {
3642 new_op = be_transform_node(op);
3645 mode = get_irn_mode(op);
3647 /* first convert to 32 bit signed if necessary */
3648 if (get_mode_size_bits(src_mode) < 32) {
3649 if (!upper_bits_clean(new_op, src_mode)) {
3650 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3651 SET_IA32_ORIG_NODE(new_op, node);
3656 assert(get_mode_size_bits(mode) == 32);
3659 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3661 set_ia32_use_frame(store);
3662 set_ia32_op_type(store, ia32_AddrModeD);
3663 set_ia32_ls_mode(store, mode_Iu);
3665 /* exception for 32bit unsigned, do a 64bit spill+load */
3666 if (!mode_is_signed(mode)) {
3669 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3671 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3672 noreg_GP, nomem, zero_const);
3674 set_ia32_use_frame(zero_store);
3675 set_ia32_op_type(zero_store, ia32_AddrModeD);
3676 add_ia32_am_offs_int(zero_store, 4);
3677 set_ia32_ls_mode(zero_store, mode_Iu);
3682 store = new_rd_Sync(dbgi, block, 2, in);
3683 store_mode = mode_Ls;
3685 store_mode = mode_Is;
3689 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3691 set_ia32_use_frame(fild);
3692 set_ia32_op_type(fild, ia32_AddrModeS);
3693 set_ia32_ls_mode(fild, store_mode);
3695 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3701 * Create a conversion from one integer mode into another one
3703 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3704 dbg_info *dbgi, ir_node *block, ir_node *op,
3707 ir_node *new_block = be_transform_node(block);
3709 ir_mode *smaller_mode;
3710 ia32_address_mode_t am;
3711 ia32_address_t *addr = &am.addr;
3714 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3715 smaller_mode = src_mode;
3717 smaller_mode = tgt_mode;
3720 #ifdef DEBUG_libfirm
3722 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3727 match_arguments(&am, block, NULL, op, NULL,
3728 match_am | match_8bit_am | match_16bit_am);
3730 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3731 /* unnecessary conv. in theory it shouldn't have been AM */
3732 assert(is_ia32_NoReg_GP(addr->base));
3733 assert(is_ia32_NoReg_GP(addr->index));
3734 assert(is_NoMem(addr->mem));
3735 assert(am.addr.offset == 0);
3736 assert(am.addr.symconst_ent == NULL);
3740 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3741 addr->mem, am.new_op2, smaller_mode);
3742 set_am_attributes(new_node, &am);
3743 /* match_arguments assume that out-mode = in-mode, this isn't true here
3745 set_ia32_ls_mode(new_node, smaller_mode);
3746 SET_IA32_ORIG_NODE(new_node, node);
3747 new_node = fix_mem_proj(new_node, &am);
3752 * Transforms a Conv node.
3754 * @return The created ia32 Conv node
3756 static ir_node *gen_Conv(ir_node *node)
3758 ir_node *block = get_nodes_block(node);
3759 ir_node *new_block = be_transform_node(block);
3760 ir_node *op = get_Conv_op(node);
3761 ir_node *new_op = NULL;
3762 dbg_info *dbgi = get_irn_dbg_info(node);
3763 ir_mode *src_mode = get_irn_mode(op);
3764 ir_mode *tgt_mode = get_irn_mode(node);
3765 int src_bits = get_mode_size_bits(src_mode);
3766 int tgt_bits = get_mode_size_bits(tgt_mode);
3767 ir_node *res = NULL;
3769 assert(!mode_is_int(src_mode) || src_bits <= 32);
3770 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3772 /* modeB -> X should already be lowered by the lower_mode_b pass */
3773 if (src_mode == mode_b) {
3774 panic("ConvB not lowered %+F", node);
3777 if (src_mode == tgt_mode) {
3778 if (get_Conv_strict(node)) {
3779 if (ia32_cg_config.use_sse2) {
3780 /* when we are in SSE mode, we can kill all strict no-op conversion */
3781 return be_transform_node(op);
3784 /* this should be optimized already, but who knows... */
3785 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3786 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3787 return be_transform_node(op);
3791 if (mode_is_float(src_mode)) {
3792 new_op = be_transform_node(op);
3793 /* we convert from float ... */
3794 if (mode_is_float(tgt_mode)) {
3796 if (ia32_cg_config.use_sse2) {
3797 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3798 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3800 set_ia32_ls_mode(res, tgt_mode);
3802 if (get_Conv_strict(node)) {
3803 /* if fp_no_float_fold is not set then we assume that we
3804 * don't have any float operations in a non
3805 * mode_float_arithmetic mode and can skip strict upconvs */
3806 if (src_bits < tgt_bits) {
3807 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3810 res = gen_x87_strict_conv(tgt_mode, new_op);
3811 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3815 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3820 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3821 if (ia32_cg_config.use_sse2) {
3822 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3824 set_ia32_ls_mode(res, src_mode);
3826 return gen_x87_fp_to_gp(node);
3830 /* we convert from int ... */
3831 if (mode_is_float(tgt_mode)) {
3833 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3834 if (ia32_cg_config.use_sse2) {
3835 new_op = be_transform_node(op);
3836 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3838 set_ia32_ls_mode(res, tgt_mode);
3840 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3841 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3842 res = gen_x87_gp_to_fp(node, src_mode);
3844 /* we need a strict-Conv, if the int mode has more bits than the
3846 if (float_mantissa < int_mantissa) {
3847 res = gen_x87_strict_conv(tgt_mode, res);
3848 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3852 } else if (tgt_mode == mode_b) {
3853 /* mode_b lowering already took care that we only have 0/1 values */
3854 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3855 src_mode, tgt_mode));
3856 return be_transform_node(op);
3859 if (src_bits == tgt_bits) {
3860 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3861 src_mode, tgt_mode));
3862 return be_transform_node(op);
3865 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3873 static ir_node *create_immediate_or_transform(ir_node *node,
3874 char immediate_constraint_type)
3876 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3877 if (new_node == NULL) {
3878 new_node = be_transform_node(node);
3884 * Transforms a FrameAddr into an ia32 Add.
3886 static ir_node *gen_be_FrameAddr(ir_node *node)
3888 ir_node *block = be_transform_node(get_nodes_block(node));
3889 ir_node *op = be_get_FrameAddr_frame(node);
3890 ir_node *new_op = be_transform_node(op);
3891 dbg_info *dbgi = get_irn_dbg_info(node);
3894 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3895 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3896 set_ia32_use_frame(new_node);
3898 SET_IA32_ORIG_NODE(new_node, node);
3904 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3906 static ir_node *gen_be_Return(ir_node *node)
3908 ir_graph *irg = current_ir_graph;
3909 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3910 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3911 ir_entity *ent = get_irg_entity(irg);
3912 ir_type *tp = get_entity_type(ent);
3917 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3918 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3920 int pn_ret_val, pn_ret_mem, arity, i;
3922 assert(ret_val != NULL);
3923 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3924 return be_duplicate_node(node);
3927 res_type = get_method_res_type(tp, 0);
3929 if (! is_Primitive_type(res_type)) {
3930 return be_duplicate_node(node);
3933 mode = get_type_mode(res_type);
3934 if (! mode_is_float(mode)) {
3935 return be_duplicate_node(node);
3938 assert(get_method_n_ress(tp) == 1);
3940 pn_ret_val = get_Proj_proj(ret_val);
3941 pn_ret_mem = get_Proj_proj(ret_mem);
3943 /* get the Barrier */
3944 barrier = get_Proj_pred(ret_val);
3946 /* get result input of the Barrier */
3947 ret_val = get_irn_n(barrier, pn_ret_val);
3948 new_ret_val = be_transform_node(ret_val);
3950 /* get memory input of the Barrier */
3951 ret_mem = get_irn_n(barrier, pn_ret_mem);
3952 new_ret_mem = be_transform_node(ret_mem);
3954 frame = get_irg_frame(irg);
3956 dbgi = get_irn_dbg_info(barrier);
3957 block = be_transform_node(get_nodes_block(barrier));
3959 /* store xmm0 onto stack */
3960 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3961 new_ret_mem, new_ret_val);
3962 set_ia32_ls_mode(sse_store, mode);
3963 set_ia32_op_type(sse_store, ia32_AddrModeD);
3964 set_ia32_use_frame(sse_store);
3966 /* load into x87 register */
3967 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3968 set_ia32_op_type(fld, ia32_AddrModeS);
3969 set_ia32_use_frame(fld);
3971 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3972 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3974 /* create a new barrier */
3975 arity = get_irn_arity(barrier);
3976 in = ALLOCAN(ir_node*, arity);
3977 for (i = 0; i < arity; ++i) {
3980 if (i == pn_ret_val) {
3982 } else if (i == pn_ret_mem) {
3985 ir_node *in = get_irn_n(barrier, i);
3986 new_in = be_transform_node(in);
3991 new_barrier = new_ir_node(dbgi, irg, block,
3992 get_irn_op(barrier), get_irn_mode(barrier),
3994 copy_node_attr(irg, barrier, new_barrier);
3995 be_duplicate_deps(barrier, new_barrier);
3996 be_set_transformed_node(barrier, new_barrier);
3998 /* transform normally */
3999 return be_duplicate_node(node);
4003 * Transform a be_AddSP into an ia32_SubSP.
4005 static ir_node *gen_be_AddSP(ir_node *node)
4007 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4008 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4010 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4011 match_am | match_immediate);
4015 * Transform a be_SubSP into an ia32_AddSP
4017 static ir_node *gen_be_SubSP(ir_node *node)
4019 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4020 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4022 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4023 match_am | match_immediate);
4027 * Change some phi modes
4029 static ir_node *gen_Phi(ir_node *node)
4031 const arch_register_req_t *req;
4032 ir_node *block = be_transform_node(get_nodes_block(node));
4033 ir_graph *irg = current_ir_graph;
4034 dbg_info *dbgi = get_irn_dbg_info(node);
4035 ir_mode *mode = get_irn_mode(node);
4038 if (ia32_mode_needs_gp_reg(mode)) {
4039 /* we shouldn't have any 64bit stuff around anymore */
4040 assert(get_mode_size_bits(mode) <= 32);
4041 /* all integer operations are on 32bit registers now */
4043 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4044 } else if (mode_is_float(mode)) {
4045 if (ia32_cg_config.use_sse2) {
4047 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4050 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4053 req = arch_no_register_req;
4056 /* phi nodes allow loops, so we use the old arguments for now
4057 * and fix this later */
4058 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4059 get_irn_in(node) + 1);
4060 copy_node_attr(irg, node, phi);
4061 be_duplicate_deps(node, phi);
4063 arch_set_out_register_req(phi, 0, req);
4065 be_enqueue_preds(node);
4070 static ir_node *gen_Jmp(ir_node *node)
4072 ir_node *block = get_nodes_block(node);
4073 ir_node *new_block = be_transform_node(block);
4074 dbg_info *dbgi = get_irn_dbg_info(node);
4077 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4078 SET_IA32_ORIG_NODE(new_node, node);
4086 static ir_node *gen_IJmp(ir_node *node)
4088 ir_node *block = get_nodes_block(node);
4089 ir_node *new_block = be_transform_node(block);
4090 dbg_info *dbgi = get_irn_dbg_info(node);
4091 ir_node *op = get_IJmp_target(node);
4093 ia32_address_mode_t am;
4094 ia32_address_t *addr = &am.addr;
4096 assert(get_irn_mode(op) == mode_P);
4098 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4100 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4101 addr->mem, am.new_op2);
4102 set_am_attributes(new_node, &am);
4103 SET_IA32_ORIG_NODE(new_node, node);
4105 new_node = fix_mem_proj(new_node, &am);
4111 * Transform a Bound node.
4113 static ir_node *gen_Bound(ir_node *node)
4116 ir_node *lower = get_Bound_lower(node);
4117 dbg_info *dbgi = get_irn_dbg_info(node);
4119 if (is_Const_0(lower)) {
4120 /* typical case for Java */
4121 ir_node *sub, *res, *flags, *block;
4123 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4125 match_mode_neutral | match_am | match_immediate);
4127 block = get_nodes_block(res);
4128 if (! is_Proj(res)) {
4130 set_irn_mode(sub, mode_T);
4131 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4133 sub = get_Proj_pred(res);
4135 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4136 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4137 SET_IA32_ORIG_NODE(new_node, node);
4139 panic("generic Bound not supported in ia32 Backend");
4145 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4147 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4148 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4150 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4151 match_immediate | match_mode_neutral);
4154 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4156 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4157 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4158 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4162 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4164 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4165 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4166 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4170 static ir_node *gen_ia32_l_Add(ir_node *node)
4172 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4173 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4174 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4175 match_commutative | match_am | match_immediate |
4176 match_mode_neutral);
4178 if (is_Proj(lowered)) {
4179 lowered = get_Proj_pred(lowered);
4181 assert(is_ia32_Add(lowered));
4182 set_irn_mode(lowered, mode_T);
4188 static ir_node *gen_ia32_l_Adc(ir_node *node)
4190 return gen_binop_flags(node, new_bd_ia32_Adc,
4191 match_commutative | match_am | match_immediate |
4192 match_mode_neutral);
4196 * Transforms a l_MulS into a "real" MulS node.
4198 * @return the created ia32 Mul node
4200 static ir_node *gen_ia32_l_Mul(ir_node *node)
4202 ir_node *left = get_binop_left(node);
4203 ir_node *right = get_binop_right(node);
4205 return gen_binop(node, left, right, new_bd_ia32_Mul,
4206 match_commutative | match_am | match_mode_neutral);
4210 * Transforms a l_IMulS into a "real" IMul1OPS node.
4212 * @return the created ia32 IMul1OP node
4214 static ir_node *gen_ia32_l_IMul(ir_node *node)
4216 ir_node *left = get_binop_left(node);
4217 ir_node *right = get_binop_right(node);
4219 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4220 match_commutative | match_am | match_mode_neutral);
4223 static ir_node *gen_ia32_l_Sub(ir_node *node)
4225 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4226 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4227 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4228 match_am | match_immediate | match_mode_neutral);
4230 if (is_Proj(lowered)) {
4231 lowered = get_Proj_pred(lowered);
4233 assert(is_ia32_Sub(lowered));
4234 set_irn_mode(lowered, mode_T);
4240 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4242 return gen_binop_flags(node, new_bd_ia32_Sbb,
4243 match_am | match_immediate | match_mode_neutral);
4247 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4248 * op1 - target to be shifted
4249 * op2 - contains bits to be shifted into target
4251 * Only op3 can be an immediate.
4253 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4254 ir_node *low, ir_node *count)
4256 ir_node *block = get_nodes_block(node);
4257 ir_node *new_block = be_transform_node(block);
4258 dbg_info *dbgi = get_irn_dbg_info(node);
4259 ir_node *new_high = be_transform_node(high);
4260 ir_node *new_low = be_transform_node(low);
4264 /* the shift amount can be any mode that is bigger than 5 bits, since all
4265 * other bits are ignored anyway */
4266 while (is_Conv(count) &&
4267 get_irn_n_edges(count) == 1 &&
4268 mode_is_int(get_irn_mode(count))) {
4269 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4270 count = get_Conv_op(count);
4272 new_count = create_immediate_or_transform(count, 0);
4274 if (is_ia32_l_ShlD(node)) {
4275 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4278 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4281 SET_IA32_ORIG_NODE(new_node, node);
4286 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4288 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4289 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4290 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4291 return gen_lowered_64bit_shifts(node, high, low, count);
4294 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4296 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4297 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4298 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4299 return gen_lowered_64bit_shifts(node, high, low, count);
4302 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4304 ir_node *src_block = get_nodes_block(node);
4305 ir_node *block = be_transform_node(src_block);
4306 ir_graph *irg = current_ir_graph;
4307 dbg_info *dbgi = get_irn_dbg_info(node);
4308 ir_node *frame = get_irg_frame(irg);
4309 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4310 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4311 ir_node *new_val_low = be_transform_node(val_low);
4312 ir_node *new_val_high = be_transform_node(val_high);
4314 ir_node *sync, *fild, *res;
4315 ir_node *store_low, *store_high;
4317 if (ia32_cg_config.use_sse2) {
4318 panic("ia32_l_LLtoFloat not implemented for SSE2");
4322 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4324 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4326 SET_IA32_ORIG_NODE(store_low, node);
4327 SET_IA32_ORIG_NODE(store_high, node);
4329 set_ia32_use_frame(store_low);
4330 set_ia32_use_frame(store_high);
4331 set_ia32_op_type(store_low, ia32_AddrModeD);
4332 set_ia32_op_type(store_high, ia32_AddrModeD);
4333 set_ia32_ls_mode(store_low, mode_Iu);
4334 set_ia32_ls_mode(store_high, mode_Is);
4335 add_ia32_am_offs_int(store_high, 4);
4339 sync = new_rd_Sync(dbgi, block, 2, in);
4342 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4344 set_ia32_use_frame(fild);
4345 set_ia32_op_type(fild, ia32_AddrModeS);
4346 set_ia32_ls_mode(fild, mode_Ls);
4348 SET_IA32_ORIG_NODE(fild, node);
4350 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4352 if (! mode_is_signed(get_irn_mode(val_high))) {
4353 ia32_address_mode_t am;
4355 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4358 am.addr.base = get_symconst_base();
4359 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4360 am.addr.mem = nomem;
4363 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4364 am.addr.use_frame = 0;
4365 am.addr.frame_entity = NULL;
4366 am.addr.symconst_sign = 0;
4367 am.ls_mode = mode_F;
4368 am.mem_proj = nomem;
4369 am.op_type = ia32_AddrModeS;
4371 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4372 am.pinned = op_pin_state_floats;
4374 am.ins_permuted = 0;
4376 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4377 am.new_op1, am.new_op2, get_fpcw());
4378 set_am_attributes(fadd, &am);
4380 set_irn_mode(fadd, mode_T);
4381 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4386 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4388 ir_node *src_block = get_nodes_block(node);
4389 ir_node *block = be_transform_node(src_block);
4390 ir_graph *irg = get_Block_irg(block);
4391 dbg_info *dbgi = get_irn_dbg_info(node);
4392 ir_node *frame = get_irg_frame(irg);
4393 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4394 ir_node *new_val = be_transform_node(val);
4395 ir_node *fist, *mem;
4397 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4398 SET_IA32_ORIG_NODE(fist, node);
4399 set_ia32_use_frame(fist);
4400 set_ia32_op_type(fist, ia32_AddrModeD);
4401 set_ia32_ls_mode(fist, mode_Ls);
4406 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4408 ir_node *block = be_transform_node(get_nodes_block(node));
4409 ir_graph *irg = get_Block_irg(block);
4410 ir_node *pred = get_Proj_pred(node);
4411 ir_node *new_pred = be_transform_node(pred);
4412 ir_node *frame = get_irg_frame(irg);
4413 dbg_info *dbgi = get_irn_dbg_info(node);
4414 long pn = get_Proj_proj(node);
4419 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4420 SET_IA32_ORIG_NODE(load, node);
4421 set_ia32_use_frame(load);
4422 set_ia32_op_type(load, ia32_AddrModeS);
4423 set_ia32_ls_mode(load, mode_Iu);
4424 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4425 * 32 bit from it with this particular load */
4426 attr = get_ia32_attr(load);
4427 attr->data.need_64bit_stackent = 1;
4429 if (pn == pn_ia32_l_FloattoLL_res_high) {
4430 add_ia32_am_offs_int(load, 4);
4432 assert(pn == pn_ia32_l_FloattoLL_res_low);
4435 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4441 * Transform the Projs of an AddSP.
4443 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4445 ir_node *pred = get_Proj_pred(node);
4446 ir_node *new_pred = be_transform_node(pred);
4447 dbg_info *dbgi = get_irn_dbg_info(node);
4448 long proj = get_Proj_proj(node);
4450 if (proj == pn_be_AddSP_sp) {
4451 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4452 pn_ia32_SubSP_stack);
4453 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4455 } else if (proj == pn_be_AddSP_res) {
4456 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4457 pn_ia32_SubSP_addr);
4458 } else if (proj == pn_be_AddSP_M) {
4459 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4462 panic("No idea how to transform proj->AddSP");
4466 * Transform the Projs of a SubSP.
4468 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4470 ir_node *pred = get_Proj_pred(node);
4471 ir_node *new_pred = be_transform_node(pred);
4472 dbg_info *dbgi = get_irn_dbg_info(node);
4473 long proj = get_Proj_proj(node);
4475 if (proj == pn_be_SubSP_sp) {
4476 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4477 pn_ia32_AddSP_stack);
4478 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4480 } else if (proj == pn_be_SubSP_M) {
4481 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4484 panic("No idea how to transform proj->SubSP");
4488 * Transform and renumber the Projs from a Load.
4490 static ir_node *gen_Proj_Load(ir_node *node)
4493 ir_node *block = be_transform_node(get_nodes_block(node));
4494 ir_node *pred = get_Proj_pred(node);
4495 dbg_info *dbgi = get_irn_dbg_info(node);
4496 long proj = get_Proj_proj(node);
4498 /* loads might be part of source address mode matches, so we don't
4499 * transform the ProjMs yet (with the exception of loads whose result is
4502 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4505 /* this is needed, because sometimes we have loops that are only
4506 reachable through the ProjM */
4507 be_enqueue_preds(node);
4508 /* do it in 2 steps, to silence firm verifier */
4509 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4510 set_Proj_proj(res, pn_ia32_mem);
4514 /* renumber the proj */
4515 new_pred = be_transform_node(pred);
4516 if (is_ia32_Load(new_pred)) {
4519 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4521 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4522 case pn_Load_X_regular:
4523 return new_rd_Jmp(dbgi, block);
4524 case pn_Load_X_except:
4525 /* This Load might raise an exception. Mark it. */
4526 set_ia32_exc_label(new_pred, 1);
4527 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4531 } else if (is_ia32_Conv_I2I(new_pred) ||
4532 is_ia32_Conv_I2I8Bit(new_pred)) {
4533 set_irn_mode(new_pred, mode_T);
4534 if (proj == pn_Load_res) {
4535 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4536 } else if (proj == pn_Load_M) {
4537 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4539 } else if (is_ia32_xLoad(new_pred)) {
4542 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4544 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4545 case pn_Load_X_regular:
4546 return new_rd_Jmp(dbgi, block);
4547 case pn_Load_X_except:
4548 /* This Load might raise an exception. Mark it. */
4549 set_ia32_exc_label(new_pred, 1);
4550 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4554 } else if (is_ia32_vfld(new_pred)) {
4557 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4559 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4560 case pn_Load_X_regular:
4561 return new_rd_Jmp(dbgi, block);
4562 case pn_Load_X_except:
4563 /* This Load might raise an exception. Mark it. */
4564 set_ia32_exc_label(new_pred, 1);
4565 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4570 /* can happen for ProJMs when source address mode happened for the
4573 /* however it should not be the result proj, as that would mean the
4574 load had multiple users and should not have been used for
4576 if (proj != pn_Load_M) {
4577 panic("internal error: transformed node not a Load");
4579 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4582 panic("No idea how to transform proj");
4586 * Transform and renumber the Projs from a DivMod like instruction.
4588 static ir_node *gen_Proj_DivMod(ir_node *node)
4590 ir_node *block = be_transform_node(get_nodes_block(node));
4591 ir_node *pred = get_Proj_pred(node);
4592 ir_node *new_pred = be_transform_node(pred);
4593 dbg_info *dbgi = get_irn_dbg_info(node);
4594 long proj = get_Proj_proj(node);
4596 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4598 switch (get_irn_opcode(pred)) {
4602 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4604 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4605 case pn_Div_X_regular:
4606 return new_rd_Jmp(dbgi, block);
4607 case pn_Div_X_except:
4608 set_ia32_exc_label(new_pred, 1);
4609 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4617 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4619 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4620 case pn_Mod_X_except:
4621 set_ia32_exc_label(new_pred, 1);
4622 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4630 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4631 case pn_DivMod_res_div:
4632 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4633 case pn_DivMod_res_mod:
4634 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4635 case pn_DivMod_X_regular:
4636 return new_rd_Jmp(dbgi, block);
4637 case pn_DivMod_X_except:
4638 set_ia32_exc_label(new_pred, 1);
4639 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4648 panic("No idea how to transform proj->DivMod");
4652 * Transform and renumber the Projs from a CopyB.
4654 static ir_node *gen_Proj_CopyB(ir_node *node)
4656 ir_node *pred = get_Proj_pred(node);
4657 ir_node *new_pred = be_transform_node(pred);
4658 dbg_info *dbgi = get_irn_dbg_info(node);
4659 long proj = get_Proj_proj(node);
4663 if (is_ia32_CopyB_i(new_pred)) {
4664 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4665 } else if (is_ia32_CopyB(new_pred)) {
4666 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4673 panic("No idea how to transform proj->CopyB");
4677 * Transform and renumber the Projs from a Quot.
4679 static ir_node *gen_Proj_Quot(ir_node *node)
4681 ir_node *pred = get_Proj_pred(node);
4682 ir_node *new_pred = be_transform_node(pred);
4683 dbg_info *dbgi = get_irn_dbg_info(node);
4684 long proj = get_Proj_proj(node);
4688 if (is_ia32_xDiv(new_pred)) {
4689 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4690 } else if (is_ia32_vfdiv(new_pred)) {
4691 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4695 if (is_ia32_xDiv(new_pred)) {
4696 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4697 } else if (is_ia32_vfdiv(new_pred)) {
4698 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4701 case pn_Quot_X_regular:
4702 case pn_Quot_X_except:
4707 panic("No idea how to transform proj->Quot");
4710 static ir_node *gen_be_Call(ir_node *node)
4712 dbg_info *const dbgi = get_irn_dbg_info(node);
4713 ir_node *const src_block = get_nodes_block(node);
4714 ir_node *const block = be_transform_node(src_block);
4715 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4716 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4717 ir_node *const sp = be_transform_node(src_sp);
4718 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4719 ia32_address_mode_t am;
4720 ia32_address_t *const addr = &am.addr;
4725 ir_node * eax = noreg_GP;
4726 ir_node * ecx = noreg_GP;
4727 ir_node * edx = noreg_GP;
4728 unsigned const pop = be_Call_get_pop(node);
4729 ir_type *const call_tp = be_Call_get_type(node);
4730 int old_no_pic_adjust;
4732 /* Run the x87 simulator if the call returns a float value */
4733 if (get_method_n_ress(call_tp) > 0) {
4734 ir_type *const res_type = get_method_res_type(call_tp, 0);
4735 ir_mode *const res_mode = get_type_mode(res_type);
4737 if (res_mode != NULL && mode_is_float(res_mode)) {
4738 ir_graph *irg = current_ir_graph;
4739 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4740 irg_data->do_x87_sim = 1;
4744 /* We do not want be_Call direct calls */
4745 assert(be_Call_get_entity(node) == NULL);
4747 /* special case for PIC trampoline calls */
4748 old_no_pic_adjust = ia32_no_pic_adjust;
4749 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4751 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4752 match_am | match_immediate);
4754 ia32_no_pic_adjust = old_no_pic_adjust;
4756 i = get_irn_arity(node) - 1;
4757 fpcw = be_transform_node(get_irn_n(node, i--));
4758 for (; i >= be_pos_Call_first_arg; --i) {
4759 arch_register_req_t const *const req = arch_get_register_req(node, i);
4760 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4762 assert(req->type == arch_register_req_type_limited);
4763 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4765 switch (*req->limited) {
4766 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4767 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4768 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4769 default: panic("Invalid GP register for register parameter");
4773 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4774 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4775 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4776 set_am_attributes(call, &am);
4777 call = fix_mem_proj(call, &am);
4779 if (get_irn_pinned(node) == op_pin_state_pinned)
4780 set_irn_pinned(call, op_pin_state_pinned);
4782 SET_IA32_ORIG_NODE(call, node);
4784 if (ia32_cg_config.use_sse2) {
4785 /* remember this call for post-processing */
4786 ARR_APP1(ir_node *, call_list, call);
4787 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4794 * Transform Builtin trap
4796 static ir_node *gen_trap(ir_node *node)
4798 dbg_info *dbgi = get_irn_dbg_info(node);
4799 ir_node *block = be_transform_node(get_nodes_block(node));
4800 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4802 return new_bd_ia32_UD2(dbgi, block, mem);
4806 * Transform Builtin debugbreak
4808 static ir_node *gen_debugbreak(ir_node *node)
4810 dbg_info *dbgi = get_irn_dbg_info(node);
4811 ir_node *block = be_transform_node(get_nodes_block(node));
4812 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4814 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4818 * Transform Builtin return_address
4820 static ir_node *gen_return_address(ir_node *node)
4822 ir_node *param = get_Builtin_param(node, 0);
4823 ir_node *frame = get_Builtin_param(node, 1);
4824 dbg_info *dbgi = get_irn_dbg_info(node);
4825 ir_tarval *tv = get_Const_tarval(param);
4826 unsigned long value = get_tarval_long(tv);
4828 ir_node *block = be_transform_node(get_nodes_block(node));
4829 ir_node *ptr = be_transform_node(frame);
4833 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4834 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4835 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4838 /* load the return address from this frame */
4839 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4841 set_irn_pinned(load, get_irn_pinned(node));
4842 set_ia32_op_type(load, ia32_AddrModeS);
4843 set_ia32_ls_mode(load, mode_Iu);
4845 set_ia32_am_offs_int(load, 0);
4846 set_ia32_use_frame(load);
4847 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4849 if (get_irn_pinned(node) == op_pin_state_floats) {
4850 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4851 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4852 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4853 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4856 SET_IA32_ORIG_NODE(load, node);
4857 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4861 * Transform Builtin frame_address
4863 static ir_node *gen_frame_address(ir_node *node)
4865 ir_node *param = get_Builtin_param(node, 0);
4866 ir_node *frame = get_Builtin_param(node, 1);
4867 dbg_info *dbgi = get_irn_dbg_info(node);
4868 ir_tarval *tv = get_Const_tarval(param);
4869 unsigned long value = get_tarval_long(tv);
4871 ir_node *block = be_transform_node(get_nodes_block(node));
4872 ir_node *ptr = be_transform_node(frame);
4877 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4878 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4879 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4882 /* load the frame address from this frame */
4883 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4885 set_irn_pinned(load, get_irn_pinned(node));
4886 set_ia32_op_type(load, ia32_AddrModeS);
4887 set_ia32_ls_mode(load, mode_Iu);
4889 ent = ia32_get_frame_address_entity();
4891 set_ia32_am_offs_int(load, 0);
4892 set_ia32_use_frame(load);
4893 set_ia32_frame_ent(load, ent);
4895 /* will fail anyway, but gcc does this: */
4896 set_ia32_am_offs_int(load, 0);
4899 if (get_irn_pinned(node) == op_pin_state_floats) {
4900 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4901 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4902 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4903 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4906 SET_IA32_ORIG_NODE(load, node);
4907 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4911 * Transform Builtin frame_address
4913 static ir_node *gen_prefetch(ir_node *node)
4916 ir_node *ptr, *block, *mem, *base, *index;
4917 ir_node *param, *new_node;
4920 ia32_address_t addr;
4922 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4923 /* no prefetch at all, route memory */
4924 return be_transform_node(get_Builtin_mem(node));
4927 param = get_Builtin_param(node, 1);
4928 tv = get_Const_tarval(param);
4929 rw = get_tarval_long(tv);
4931 /* construct load address */
4932 memset(&addr, 0, sizeof(addr));
4933 ptr = get_Builtin_param(node, 0);
4934 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4941 base = be_transform_node(base);
4944 if (index == NULL) {
4947 index = be_transform_node(index);
4950 dbgi = get_irn_dbg_info(node);
4951 block = be_transform_node(get_nodes_block(node));
4952 mem = be_transform_node(get_Builtin_mem(node));
4954 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4955 /* we have 3DNow!, this was already checked above */
4956 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4957 } else if (ia32_cg_config.use_sse_prefetch) {
4958 /* note: rw == 1 is IGNORED in that case */
4959 param = get_Builtin_param(node, 2);
4960 tv = get_Const_tarval(param);
4961 locality = get_tarval_long(tv);
4963 /* SSE style prefetch */
4966 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4969 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4972 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4975 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4979 assert(ia32_cg_config.use_3dnow_prefetch);
4980 /* 3DNow! style prefetch */
4981 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4984 set_irn_pinned(new_node, get_irn_pinned(node));
4985 set_ia32_op_type(new_node, ia32_AddrModeS);
4986 set_ia32_ls_mode(new_node, mode_Bu);
4987 set_address(new_node, &addr);
4989 SET_IA32_ORIG_NODE(new_node, node);
4991 be_dep_on_frame(new_node);
4992 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4996 * Transform bsf like node
4998 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5000 ir_node *param = get_Builtin_param(node, 0);
5001 dbg_info *dbgi = get_irn_dbg_info(node);
5003 ir_node *block = get_nodes_block(node);
5004 ir_node *new_block = be_transform_node(block);
5006 ia32_address_mode_t am;
5007 ia32_address_t *addr = &am.addr;
5010 match_arguments(&am, block, NULL, param, NULL, match_am);
5012 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5013 set_am_attributes(cnt, &am);
5014 set_ia32_ls_mode(cnt, get_irn_mode(param));
5016 SET_IA32_ORIG_NODE(cnt, node);
5017 return fix_mem_proj(cnt, &am);
5021 * Transform builtin ffs.
5023 static ir_node *gen_ffs(ir_node *node)
5025 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5026 ir_node *real = skip_Proj(bsf);
5027 dbg_info *dbgi = get_irn_dbg_info(real);
5028 ir_node *block = get_nodes_block(real);
5029 ir_node *flag, *set, *conv, *neg, *orn;
5032 if (get_irn_mode(real) != mode_T) {
5033 set_irn_mode(real, mode_T);
5034 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5037 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5040 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5041 SET_IA32_ORIG_NODE(set, node);
5044 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5045 SET_IA32_ORIG_NODE(conv, node);
5048 neg = new_bd_ia32_Neg(dbgi, block, conv);
5051 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5052 set_ia32_commutative(orn);
5055 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, orn, ia32_create_Immediate(NULL, 0, 1));
5059 * Transform builtin clz.
5061 static ir_node *gen_clz(ir_node *node)
5063 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5064 ir_node *real = skip_Proj(bsr);
5065 dbg_info *dbgi = get_irn_dbg_info(real);
5066 ir_node *block = get_nodes_block(real);
5067 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5069 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5073 * Transform builtin ctz.
5075 static ir_node *gen_ctz(ir_node *node)
5077 return gen_unop_AM(node, new_bd_ia32_Bsf);
5081 * Transform builtin parity.
5083 static ir_node *gen_parity(ir_node *node)
5085 ir_node *param = get_Builtin_param(node, 0);
5086 dbg_info *dbgi = get_irn_dbg_info(node);
5088 ir_node *block = get_nodes_block(node);
5090 ir_node *new_block = be_transform_node(block);
5091 ir_node *imm, *cmp, *new_node;
5093 ia32_address_mode_t am;
5094 ia32_address_t *addr = &am.addr;
5098 match_arguments(&am, block, NULL, param, NULL, match_am);
5099 imm = ia32_create_Immediate(NULL, 0, 0);
5100 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5101 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5102 set_am_attributes(cmp, &am);
5103 set_ia32_ls_mode(cmp, mode_Iu);
5105 SET_IA32_ORIG_NODE(cmp, node);
5107 cmp = fix_mem_proj(cmp, &am);
5110 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5111 SET_IA32_ORIG_NODE(new_node, node);
5114 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5115 nomem, new_node, mode_Bu);
5116 SET_IA32_ORIG_NODE(new_node, node);
5121 * Transform builtin popcount
5123 static ir_node *gen_popcount(ir_node *node)
5125 ir_node *param = get_Builtin_param(node, 0);
5126 dbg_info *dbgi = get_irn_dbg_info(node);
5128 ir_node *block = get_nodes_block(node);
5129 ir_node *new_block = be_transform_node(block);
5132 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5134 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5135 if (ia32_cg_config.use_popcnt) {
5136 ia32_address_mode_t am;
5137 ia32_address_t *addr = &am.addr;
5140 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5142 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5143 set_am_attributes(cnt, &am);
5144 set_ia32_ls_mode(cnt, get_irn_mode(param));
5146 SET_IA32_ORIG_NODE(cnt, node);
5147 return fix_mem_proj(cnt, &am);
5150 new_param = be_transform_node(param);
5152 /* do the standard popcount algo */
5154 /* m1 = x & 0x55555555 */
5155 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5156 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5159 simm = ia32_create_Immediate(NULL, 0, 1);
5160 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5162 /* m2 = s1 & 0x55555555 */
5163 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5166 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5168 /* m4 = m3 & 0x33333333 */
5169 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5170 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5173 simm = ia32_create_Immediate(NULL, 0, 2);
5174 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5176 /* m5 = s2 & 0x33333333 */
5177 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5180 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5182 /* m7 = m6 & 0x0F0F0F0F */
5183 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5184 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5187 simm = ia32_create_Immediate(NULL, 0, 4);
5188 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5190 /* m8 = s3 & 0x0F0F0F0F */
5191 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5194 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5196 /* m10 = m9 & 0x00FF00FF */
5197 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5198 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5201 simm = ia32_create_Immediate(NULL, 0, 8);
5202 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5204 /* m11 = s4 & 0x00FF00FF */
5205 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5207 /* m12 = m10 + m11 */
5208 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5210 /* m13 = m12 & 0x0000FFFF */
5211 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5212 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5214 /* s5 = m12 >> 16 */
5215 simm = ia32_create_Immediate(NULL, 0, 16);
5216 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5218 /* res = m13 + s5 */
5219 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5223 * Transform builtin byte swap.
5225 static ir_node *gen_bswap(ir_node *node)
5227 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5228 dbg_info *dbgi = get_irn_dbg_info(node);
5230 ir_node *block = get_nodes_block(node);
5231 ir_node *new_block = be_transform_node(block);
5232 ir_mode *mode = get_irn_mode(param);
5233 unsigned size = get_mode_size_bits(mode);
5234 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5238 if (ia32_cg_config.use_i486) {
5239 /* swap available */
5240 return new_bd_ia32_Bswap(dbgi, new_block, param);
5242 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5243 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5245 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5246 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5248 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5250 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5251 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5253 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5254 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5257 /* swap16 always available */
5258 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5261 panic("Invalid bswap size (%d)", size);
5266 * Transform builtin outport.
5268 static ir_node *gen_outport(ir_node *node)
5270 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5271 ir_node *oldv = get_Builtin_param(node, 1);
5272 ir_mode *mode = get_irn_mode(oldv);
5273 ir_node *value = be_transform_node(oldv);
5274 ir_node *block = be_transform_node(get_nodes_block(node));
5275 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5276 dbg_info *dbgi = get_irn_dbg_info(node);
5278 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5279 set_ia32_ls_mode(res, mode);
5284 * Transform builtin inport.
5286 static ir_node *gen_inport(ir_node *node)
5288 ir_type *tp = get_Builtin_type(node);
5289 ir_type *rstp = get_method_res_type(tp, 0);
5290 ir_mode *mode = get_type_mode(rstp);
5291 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5292 ir_node *block = be_transform_node(get_nodes_block(node));
5293 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5294 dbg_info *dbgi = get_irn_dbg_info(node);
5296 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5297 set_ia32_ls_mode(res, mode);
5299 /* check for missing Result Proj */
5304 * Transform a builtin inner trampoline
5306 static ir_node *gen_inner_trampoline(ir_node *node)
5308 ir_node *ptr = get_Builtin_param(node, 0);
5309 ir_node *callee = get_Builtin_param(node, 1);
5310 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5311 ir_node *mem = get_Builtin_mem(node);
5312 ir_node *block = get_nodes_block(node);
5313 ir_node *new_block = be_transform_node(block);
5317 ir_node *trampoline;
5319 dbg_info *dbgi = get_irn_dbg_info(node);
5320 ia32_address_t addr;
5322 /* construct store address */
5323 memset(&addr, 0, sizeof(addr));
5324 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5326 if (addr.base == NULL) {
5327 addr.base = noreg_GP;
5329 addr.base = be_transform_node(addr.base);
5332 if (addr.index == NULL) {
5333 addr.index = noreg_GP;
5335 addr.index = be_transform_node(addr.index);
5337 addr.mem = be_transform_node(mem);
5339 /* mov ecx, <env> */
5340 val = ia32_create_Immediate(NULL, 0, 0xB9);
5341 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5342 addr.index, addr.mem, val);
5343 set_irn_pinned(store, get_irn_pinned(node));
5344 set_ia32_op_type(store, ia32_AddrModeD);
5345 set_ia32_ls_mode(store, mode_Bu);
5346 set_address(store, &addr);
5350 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5351 addr.index, addr.mem, env);
5352 set_irn_pinned(store, get_irn_pinned(node));
5353 set_ia32_op_type(store, ia32_AddrModeD);
5354 set_ia32_ls_mode(store, mode_Iu);
5355 set_address(store, &addr);
5359 /* jmp rel <callee> */
5360 val = ia32_create_Immediate(NULL, 0, 0xE9);
5361 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5362 addr.index, addr.mem, val);
5363 set_irn_pinned(store, get_irn_pinned(node));
5364 set_ia32_op_type(store, ia32_AddrModeD);
5365 set_ia32_ls_mode(store, mode_Bu);
5366 set_address(store, &addr);
5370 trampoline = be_transform_node(ptr);
5372 /* the callee is typically an immediate */
5373 if (is_SymConst(callee)) {
5374 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5376 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5378 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5380 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5381 addr.index, addr.mem, rel);
5382 set_irn_pinned(store, get_irn_pinned(node));
5383 set_ia32_op_type(store, ia32_AddrModeD);
5384 set_ia32_ls_mode(store, mode_Iu);
5385 set_address(store, &addr);
5390 return new_r_Tuple(new_block, 2, in);
5394 * Transform Builtin node.
5396 static ir_node *gen_Builtin(ir_node *node)
5398 ir_builtin_kind kind = get_Builtin_kind(node);
5402 return gen_trap(node);
5403 case ir_bk_debugbreak:
5404 return gen_debugbreak(node);
5405 case ir_bk_return_address:
5406 return gen_return_address(node);
5407 case ir_bk_frame_address:
5408 return gen_frame_address(node);
5409 case ir_bk_prefetch:
5410 return gen_prefetch(node);
5412 return gen_ffs(node);
5414 return gen_clz(node);
5416 return gen_ctz(node);
5418 return gen_parity(node);
5419 case ir_bk_popcount:
5420 return gen_popcount(node);
5422 return gen_bswap(node);
5424 return gen_outport(node);
5426 return gen_inport(node);
5427 case ir_bk_inner_trampoline:
5428 return gen_inner_trampoline(node);
5430 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5434 * Transform Proj(Builtin) node.
5436 static ir_node *gen_Proj_Builtin(ir_node *proj)
5438 ir_node *node = get_Proj_pred(proj);
5439 ir_node *new_node = be_transform_node(node);
5440 ir_builtin_kind kind = get_Builtin_kind(node);
5443 case ir_bk_return_address:
5444 case ir_bk_frame_address:
5449 case ir_bk_popcount:
5451 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5454 case ir_bk_debugbreak:
5455 case ir_bk_prefetch:
5457 assert(get_Proj_proj(proj) == pn_Builtin_M);
5460 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5461 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5463 assert(get_Proj_proj(proj) == pn_Builtin_M);
5464 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5466 case ir_bk_inner_trampoline:
5467 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5468 return get_Tuple_pred(new_node, 1);
5470 assert(get_Proj_proj(proj) == pn_Builtin_M);
5471 return get_Tuple_pred(new_node, 0);
5474 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5477 static ir_node *gen_be_IncSP(ir_node *node)
5479 ir_node *res = be_duplicate_node(node);
5480 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5486 * Transform the Projs from a be_Call.
5488 static ir_node *gen_Proj_be_Call(ir_node *node)
5490 ir_node *call = get_Proj_pred(node);
5491 ir_node *new_call = be_transform_node(call);
5492 dbg_info *dbgi = get_irn_dbg_info(node);
5493 long proj = get_Proj_proj(node);
5494 ir_mode *mode = get_irn_mode(node);
5497 if (proj == pn_be_Call_M_regular) {
5498 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5500 /* transform call modes */
5501 if (mode_is_data(mode)) {
5502 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5506 /* Map from be_Call to ia32_Call proj number */
5507 if (proj == pn_be_Call_sp) {
5508 proj = pn_ia32_Call_stack;
5509 } else if (proj == pn_be_Call_M_regular) {
5510 proj = pn_ia32_Call_M;
5512 arch_register_req_t const *const req = arch_get_register_req_out(node);
5513 int const n_outs = arch_irn_get_n_outs(new_call);
5516 assert(proj >= pn_be_Call_first_res);
5517 assert(req->type & arch_register_req_type_limited);
5519 for (i = 0; i < n_outs; ++i) {
5520 arch_register_req_t const *const new_req
5521 = arch_get_out_register_req(new_call, i);
5523 if (!(new_req->type & arch_register_req_type_limited) ||
5524 new_req->cls != req->cls ||
5525 *new_req->limited != *req->limited)
5534 res = new_rd_Proj(dbgi, new_call, mode, proj);
5536 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5538 case pn_ia32_Call_stack:
5539 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5542 case pn_ia32_Call_fpcw:
5543 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5551 * Transform the Projs from a Cmp.
5553 static ir_node *gen_Proj_Cmp(ir_node *node)
5555 /* this probably means not all mode_b nodes were lowered... */
5556 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5561 * Transform the Projs from a Bound.
5563 static ir_node *gen_Proj_Bound(ir_node *node)
5566 ir_node *pred = get_Proj_pred(node);
5568 switch (get_Proj_proj(node)) {
5570 return be_transform_node(get_Bound_mem(pred));
5571 case pn_Bound_X_regular:
5572 new_node = be_transform_node(pred);
5573 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5574 case pn_Bound_X_except:
5575 new_node = be_transform_node(pred);
5576 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5578 return be_transform_node(get_Bound_index(pred));
5580 panic("unsupported Proj from Bound");
5584 static ir_node *gen_Proj_ASM(ir_node *node)
5586 ir_mode *mode = get_irn_mode(node);
5587 ir_node *pred = get_Proj_pred(node);
5588 ir_node *new_pred = be_transform_node(pred);
5589 long pos = get_Proj_proj(node);
5591 if (mode == mode_M) {
5592 pos = arch_irn_get_n_outs(new_pred)-1;
5593 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5595 } else if (mode_is_float(mode)) {
5598 panic("unexpected proj mode at ASM");
5601 return new_r_Proj(new_pred, mode, pos);
5605 * Transform and potentially renumber Proj nodes.
5607 static ir_node *gen_Proj(ir_node *node)
5609 ir_node *pred = get_Proj_pred(node);
5612 switch (get_irn_opcode(pred)) {
5614 proj = get_Proj_proj(node);
5615 if (proj == pn_Store_M) {
5616 return be_transform_node(pred);
5618 panic("No idea how to transform proj->Store");
5621 return gen_Proj_Load(node);
5623 return gen_Proj_ASM(node);
5625 return gen_Proj_Builtin(node);
5629 return gen_Proj_DivMod(node);
5631 return gen_Proj_CopyB(node);
5633 return gen_Proj_Quot(node);
5635 return gen_Proj_be_SubSP(node);
5637 return gen_Proj_be_AddSP(node);
5639 return gen_Proj_be_Call(node);
5641 return gen_Proj_Cmp(node);
5643 return gen_Proj_Bound(node);
5645 proj = get_Proj_proj(node);
5647 case pn_Start_X_initial_exec: {
5648 ir_node *block = get_nodes_block(pred);
5649 ir_node *new_block = be_transform_node(block);
5650 dbg_info *dbgi = get_irn_dbg_info(node);
5651 /* we exchange the ProjX with a jump */
5652 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5657 case pn_Start_P_tls:
5658 return ia32_gen_Proj_tls(node);
5663 if (is_ia32_l_FloattoLL(pred)) {
5664 return gen_Proj_l_FloattoLL(node);
5666 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5670 ir_mode *mode = get_irn_mode(node);
5671 if (ia32_mode_needs_gp_reg(mode)) {
5672 ir_node *new_pred = be_transform_node(pred);
5673 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5674 get_Proj_proj(node));
5675 new_proj->node_nr = node->node_nr;
5680 return be_duplicate_node(node);
5684 * Enters all transform functions into the generic pointer
5686 static void register_transformers(void)
5688 /* first clear the generic function pointer for all ops */
5689 be_start_transform_setup();
5691 be_set_transform_function(op_Add, gen_Add);
5692 be_set_transform_function(op_And, gen_And);
5693 be_set_transform_function(op_ASM, ia32_gen_ASM);
5694 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5695 be_set_transform_function(op_be_Call, gen_be_Call);
5696 be_set_transform_function(op_be_Copy, gen_be_Copy);
5697 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5698 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5699 be_set_transform_function(op_be_Return, gen_be_Return);
5700 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5701 be_set_transform_function(op_Bound, gen_Bound);
5702 be_set_transform_function(op_Builtin, gen_Builtin);
5703 be_set_transform_function(op_Cmp, gen_Cmp);
5704 be_set_transform_function(op_Cond, gen_Cond);
5705 be_set_transform_function(op_Const, gen_Const);
5706 be_set_transform_function(op_Conv, gen_Conv);
5707 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5708 be_set_transform_function(op_Div, gen_Div);
5709 be_set_transform_function(op_DivMod, gen_DivMod);
5710 be_set_transform_function(op_Eor, gen_Eor);
5711 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5712 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5713 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5714 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5715 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5716 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5717 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5718 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5719 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5720 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5721 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5722 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5723 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5724 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5725 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5726 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5727 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5728 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5729 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5730 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5731 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5732 be_set_transform_function(op_IJmp, gen_IJmp);
5733 be_set_transform_function(op_Jmp, gen_Jmp);
5734 be_set_transform_function(op_Load, gen_Load);
5735 be_set_transform_function(op_Minus, gen_Minus);
5736 be_set_transform_function(op_Mod, gen_Mod);
5737 be_set_transform_function(op_Mul, gen_Mul);
5738 be_set_transform_function(op_Mulh, gen_Mulh);
5739 be_set_transform_function(op_Mux, gen_Mux);
5740 be_set_transform_function(op_Not, gen_Not);
5741 be_set_transform_function(op_Or, gen_Or);
5742 be_set_transform_function(op_Phi, gen_Phi);
5743 be_set_transform_function(op_Proj, gen_Proj);
5744 be_set_transform_function(op_Quot, gen_Quot);
5745 be_set_transform_function(op_Rotl, gen_Rotl);
5746 be_set_transform_function(op_Shl, gen_Shl);
5747 be_set_transform_function(op_Shr, gen_Shr);
5748 be_set_transform_function(op_Shrs, gen_Shrs);
5749 be_set_transform_function(op_Store, gen_Store);
5750 be_set_transform_function(op_Sub, gen_Sub);
5751 be_set_transform_function(op_SymConst, gen_SymConst);
5752 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5756 * Pre-transform all unknown and noreg nodes.
5758 static void ia32_pretransform_node(void)
5760 ir_graph *irg = current_ir_graph;
5761 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5763 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5764 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5765 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5767 nomem = get_irg_no_mem(irg);
5768 noreg_GP = ia32_new_NoReg_gp(irg);
5774 * Post-process all calls if we are in SSE mode.
5775 * The ABI requires that the results are in st0, copy them
5776 * to a xmm register.
5778 static void postprocess_fp_call_results(void)
5782 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5783 ir_node *call = call_list[i];
5784 ir_type *mtp = call_types[i];
5787 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5788 ir_type *res_tp = get_method_res_type(mtp, j);
5789 ir_node *res, *new_res;
5790 const ir_edge_t *edge, *next;
5793 if (! is_atomic_type(res_tp)) {
5794 /* no floating point return */
5797 mode = get_type_mode(res_tp);
5798 if (! mode_is_float(mode)) {
5799 /* no floating point return */
5803 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5806 /* now patch the users */
5807 foreach_out_edge_safe(res, edge, next) {
5808 ir_node *succ = get_edge_src_irn(edge);
5811 if (be_is_Keep(succ))
5814 if (is_ia32_xStore(succ)) {
5815 /* an xStore can be patched into an vfst */
5816 dbg_info *db = get_irn_dbg_info(succ);
5817 ir_node *block = get_nodes_block(succ);
5818 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5819 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5820 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5821 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5822 ir_mode *mode = get_ia32_ls_mode(succ);
5824 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5825 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5826 if (is_ia32_use_frame(succ))
5827 set_ia32_use_frame(st);
5828 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5829 set_irn_pinned(st, get_irn_pinned(succ));
5830 set_ia32_op_type(st, ia32_AddrModeD);
5834 if (new_res == NULL) {
5835 dbg_info *db = get_irn_dbg_info(call);
5836 ir_node *block = get_nodes_block(call);
5837 ir_node *frame = get_irg_frame(current_ir_graph);
5838 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5839 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5840 ir_node *vfst, *xld, *new_mem;
5842 /* store st(0) on stack */
5843 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5844 set_ia32_op_type(vfst, ia32_AddrModeD);
5845 set_ia32_use_frame(vfst);
5847 /* load into SSE register */
5848 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5849 set_ia32_op_type(xld, ia32_AddrModeS);
5850 set_ia32_use_frame(xld);
5852 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5853 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5855 if (old_mem != NULL) {
5856 edges_reroute(old_mem, new_mem, current_ir_graph);
5860 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5867 /* do the transformation */
5868 void ia32_transform_graph(ir_graph *irg)
5872 register_transformers();
5873 initial_fpcw = NULL;
5874 ia32_no_pic_adjust = 0;
5876 be_timer_push(T_HEIGHTS);
5877 ia32_heights = heights_new(irg);
5878 be_timer_pop(T_HEIGHTS);
5879 ia32_calculate_non_address_mode_nodes(irg);
5881 /* the transform phase is not safe for CSE (yet) because several nodes get
5882 * attributes set after their creation */
5883 cse_last = get_opt_cse();
5886 call_list = NEW_ARR_F(ir_node *, 0);
5887 call_types = NEW_ARR_F(ir_type *, 0);
5888 be_transform_graph(irg, ia32_pretransform_node);
5890 if (ia32_cg_config.use_sse2)
5891 postprocess_fp_call_results();
5892 DEL_ARR_F(call_types);
5893 DEL_ARR_F(call_list);
5895 set_opt_cse(cse_last);
5897 ia32_free_non_address_mode_nodes();
5898 heights_free(ia32_heights);
5899 ia32_heights = NULL;
5902 void ia32_init_transform(void)
5904 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");