2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_util.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *initial_fpcw = NULL;
94 int ia32_no_pic_adjust;
96 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
100 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
110 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem);
113 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
114 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
117 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
119 static ir_node *create_immediate_or_transform(ir_node *node,
120 char immediate_constraint_type);
122 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
123 dbg_info *dbgi, ir_node *block,
124 ir_node *op, ir_node *orig_node);
126 /* its enough to have those once */
127 static ir_node *nomem, *noreg_GP;
129 /** a list to postprocess all calls */
130 static ir_node **call_list;
131 static ir_type **call_types;
133 /** Return non-zero is a node represents the 0 constant. */
134 static bool is_Const_0(ir_node *node)
136 return is_Const(node) && is_Const_null(node);
139 /** Return non-zero is a node represents the 1 constant. */
140 static bool is_Const_1(ir_node *node)
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node)
148 return is_Const(node) && is_Const_all_one(node);
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_x87_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 if (tarval_is_null(tv) || tarval_is_one(tv))
160 /* TODO: match all the other float constants */
165 * returns true if constant can be created with a simple float command
167 static bool is_simple_sse_Const(ir_node *node)
169 ir_tarval *tv = get_Const_tarval(node);
170 ir_mode *mode = get_tarval_mode(tv);
175 if (tarval_is_null(tv)
176 #ifdef CONSTRUCT_SSE_CONST
181 #ifdef CONSTRUCT_SSE_CONST
182 if (mode == mode_D) {
183 unsigned val = get_tarval_sub_bits(tv, 0) |
184 (get_tarval_sub_bits(tv, 1) << 8) |
185 (get_tarval_sub_bits(tv, 2) << 16) |
186 (get_tarval_sub_bits(tv, 3) << 24);
188 /* lower 32bit are zero, really a 32bit constant */
191 #endif /* CONSTRUCT_SSE_CONST */
192 /* TODO: match all the other float constants */
197 * return NoREG or pic_base in case of PIC.
198 * This is necessary as base address for newly created symbols
200 static ir_node *get_symconst_base(void)
202 ir_graph *irg = current_ir_graph;
204 if (be_get_irg_options(irg)->pic) {
205 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
206 return arch_env->impl->get_pic_base(irg);
213 * Transforms a Const.
215 static ir_node *gen_Const(ir_node *node)
217 ir_node *old_block = get_nodes_block(node);
218 ir_node *block = be_transform_node(old_block);
219 dbg_info *dbgi = get_irn_dbg_info(node);
220 ir_mode *mode = get_irn_mode(node);
222 assert(is_Const(node));
224 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
262 #ifdef CONSTRUCT_SSE_CONST
263 if (mode == mode_D) {
264 unsigned val = get_tarval_sub_bits(tv, 0) |
265 (get_tarval_sub_bits(tv, 1) << 8) |
266 (get_tarval_sub_bits(tv, 2) << 16) |
267 (get_tarval_sub_bits(tv, 3) << 24);
269 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
270 ir_node *cnst, *psllq;
272 /* fine, lower 32bit are zero, produce 32bit value */
273 val = get_tarval_sub_bits(tv, 4) |
274 (get_tarval_sub_bits(tv, 5) << 8) |
275 (get_tarval_sub_bits(tv, 6) << 16) |
276 (get_tarval_sub_bits(tv, 7) << 24);
277 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
278 load = new_bd_ia32_xMovd(dbgi, block, cnst);
279 set_ia32_ls_mode(load, mode);
280 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
281 set_ia32_ls_mode(psllq, mode);
286 #endif /* CONSTRUCT_SSE_CONST */
287 floatent = ia32_create_float_const_entity(node);
289 base = get_symconst_base();
290 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
292 set_ia32_op_type(load, ia32_AddrModeS);
293 set_ia32_am_sc(load, floatent);
294 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
295 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
298 if (is_Const_null(node)) {
299 load = new_bd_ia32_vfldz(dbgi, block);
301 set_ia32_ls_mode(load, mode);
302 } else if (is_Const_one(node)) {
303 load = new_bd_ia32_vfld1(dbgi, block);
305 set_ia32_ls_mode(load, mode);
310 floatent = ia32_create_float_const_entity(node);
311 /* create_float_const_ent is smart and sometimes creates
313 ls_mode = get_type_mode(get_entity_type(floatent));
314 base = get_symconst_base();
315 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
317 set_ia32_op_type(load, ia32_AddrModeS);
318 set_ia32_am_sc(load, floatent);
319 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
320 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
323 #ifdef CONSTRUCT_SSE_CONST
325 #endif /* CONSTRUCT_SSE_CONST */
326 SET_IA32_ORIG_NODE(load, node);
328 be_dep_on_frame(load);
330 } else { /* non-float mode */
332 ir_tarval *tv = get_Const_tarval(node);
335 tv = tarval_convert_to(tv, mode_Iu);
337 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
339 panic("couldn't convert constant tarval (%+F)", node);
341 val = get_tarval_long(tv);
343 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
344 SET_IA32_ORIG_NODE(cnst, node);
346 be_dep_on_frame(cnst);
352 * Transforms a SymConst.
354 static ir_node *gen_SymConst(ir_node *node)
356 ir_node *old_block = get_nodes_block(node);
357 ir_node *block = be_transform_node(old_block);
358 dbg_info *dbgi = get_irn_dbg_info(node);
359 ir_mode *mode = get_irn_mode(node);
362 if (mode_is_float(mode)) {
363 if (ia32_cg_config.use_sse2)
364 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 set_ia32_am_sc(cnst, get_SymConst_entity(node));
368 set_ia32_use_frame(cnst);
372 if (get_SymConst_kind(node) != symconst_addr_ent) {
373 panic("backend only support symconst_addr_ent (at %+F)", node);
375 entity = get_SymConst_entity(node);
376 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
379 SET_IA32_ORIG_NODE(cnst, node);
381 be_dep_on_frame(cnst);
386 * Create a float type for the given mode and cache it.
388 * @param mode the mode for the float type (might be integer mode for SSE2 types)
389 * @param align alignment
391 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
397 if (mode == mode_Iu) {
398 static ir_type *int_Iu[16] = {NULL, };
400 if (int_Iu[align] == NULL) {
401 int_Iu[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return int_Iu[align];
406 } else if (mode == mode_Lu) {
407 static ir_type *int_Lu[16] = {NULL, };
409 if (int_Lu[align] == NULL) {
410 int_Lu[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return int_Lu[align];
415 } else if (mode == mode_F) {
416 static ir_type *float_F[16] = {NULL, };
418 if (float_F[align] == NULL) {
419 float_F[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_F[align];
424 } else if (mode == mode_D) {
425 static ir_type *float_D[16] = {NULL, };
427 if (float_D[align] == NULL) {
428 float_D[align] = tp = new_type_primitive(mode);
429 /* set the specified alignment */
430 set_type_alignment_bytes(tp, align);
432 return float_D[align];
434 static ir_type *float_E[16] = {NULL, };
436 if (float_E[align] == NULL) {
437 float_E[align] = tp = new_type_primitive(mode);
438 /* set the specified alignment */
439 set_type_alignment_bytes(tp, align);
441 return float_E[align];
446 * Create a float[2] array type for the given atomic type.
448 * @param tp the atomic type
450 static ir_type *ia32_create_float_array(ir_type *tp)
452 ir_mode *mode = get_type_mode(tp);
453 unsigned align = get_type_alignment_bytes(tp);
458 if (mode == mode_F) {
459 static ir_type *float_F[16] = {NULL, };
461 if (float_F[align] != NULL)
462 return float_F[align];
463 arr = float_F[align] = new_type_array(1, tp);
464 } else if (mode == mode_D) {
465 static ir_type *float_D[16] = {NULL, };
467 if (float_D[align] != NULL)
468 return float_D[align];
469 arr = float_D[align] = new_type_array(1, tp);
471 static ir_type *float_E[16] = {NULL, };
473 if (float_E[align] != NULL)
474 return float_E[align];
475 arr = float_E[align] = new_type_array(1, tp);
477 set_type_alignment_bytes(arr, align);
478 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
479 set_type_state(arr, layout_fixed);
483 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
484 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
486 static const struct {
487 const char *ent_name;
488 const char *cnst_str;
491 } names [ia32_known_const_max] = {
492 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
493 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
494 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
495 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
496 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
498 static ir_entity *ent_cache[ia32_known_const_max];
500 const char *ent_name, *cnst_str;
506 ent_name = names[kct].ent_name;
507 if (! ent_cache[kct]) {
508 cnst_str = names[kct].cnst_str;
510 switch (names[kct].mode) {
511 case 0: mode = mode_Iu; break;
512 case 1: mode = mode_Lu; break;
513 default: mode = mode_F; break;
515 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
516 tp = ia32_create_float_type(mode, names[kct].align);
518 if (kct == ia32_ULLBIAS)
519 tp = ia32_create_float_array(tp);
520 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
522 set_entity_ld_ident(ent, get_entity_ident(ent));
523 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
524 set_entity_visibility(ent, ir_visibility_private);
526 if (kct == ia32_ULLBIAS) {
527 ir_initializer_t *initializer = create_initializer_compound(2);
529 set_initializer_compound_value(initializer, 0,
530 create_initializer_tarval(get_mode_null(mode)));
531 set_initializer_compound_value(initializer, 1,
532 create_initializer_tarval(tv));
534 set_entity_initializer(ent, initializer);
536 set_entity_initializer(ent, create_initializer_tarval(tv));
539 /* cache the entry */
540 ent_cache[kct] = ent;
543 return ent_cache[kct];
547 * return true if the node is a Proj(Load) and could be used in source address
548 * mode for another node. Will return only true if the @p other node is not
549 * dependent on the memory of the Load (for binary operations use the other
550 * input here, for unary operations use NULL).
552 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
553 ir_node *other, ir_node *other2, match_flags_t flags)
558 /* float constants are always available */
559 if (is_Const(node)) {
560 ir_mode *mode = get_irn_mode(node);
561 if (mode_is_float(mode)) {
562 if (ia32_cg_config.use_sse2) {
563 if (is_simple_sse_Const(node))
566 if (is_simple_x87_Const(node))
569 if (get_irn_n_edges(node) > 1)
577 load = get_Proj_pred(node);
578 pn = get_Proj_proj(node);
579 if (!is_Load(load) || pn != pn_Load_res)
581 if (get_nodes_block(load) != block)
583 /* we only use address mode if we're the only user of the load */
584 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
586 /* in some edge cases with address mode we might reach the load normally
587 * and through some AM sequence, if it is already materialized then we
588 * can't create an AM node from it */
589 if (be_is_transformed(node))
592 /* don't do AM if other node inputs depend on the load (via mem-proj) */
593 if (other != NULL && ia32_prevents_AM(block, load, other))
596 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
602 typedef struct ia32_address_mode_t ia32_address_mode_t;
603 struct ia32_address_mode_t {
608 ia32_op_type_t op_type;
612 unsigned commutative : 1;
613 unsigned ins_permuted : 1;
616 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
618 /* construct load address */
619 memset(addr, 0, sizeof(addr[0]));
620 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
622 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
623 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
624 addr->mem = be_transform_node(mem);
627 static void build_address(ia32_address_mode_t *am, ir_node *node,
628 ia32_create_am_flags_t flags)
630 ia32_address_t *addr = &am->addr;
636 /* floating point immediates */
637 if (is_Const(node)) {
638 ir_entity *entity = ia32_create_float_const_entity(node);
639 addr->base = get_symconst_base();
640 addr->index = noreg_GP;
642 addr->symconst_ent = entity;
644 am->ls_mode = get_type_mode(get_entity_type(entity));
645 am->pinned = op_pin_state_floats;
649 load = get_Proj_pred(node);
650 ptr = get_Load_ptr(load);
651 mem = get_Load_mem(load);
652 new_mem = be_transform_node(mem);
653 am->pinned = get_irn_pinned(load);
654 am->ls_mode = get_Load_mode(load);
655 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
658 /* construct load address */
659 ia32_create_address_mode(addr, ptr, flags);
661 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
662 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
666 static void set_address(ir_node *node, const ia32_address_t *addr)
668 set_ia32_am_scale(node, addr->scale);
669 set_ia32_am_sc(node, addr->symconst_ent);
670 set_ia32_am_offs_int(node, addr->offset);
671 if (addr->symconst_sign)
672 set_ia32_am_sc_sign(node);
674 set_ia32_use_frame(node);
675 set_ia32_frame_ent(node, addr->frame_entity);
679 * Apply attributes of a given address mode to a node.
681 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
683 set_address(node, &am->addr);
685 set_ia32_op_type(node, am->op_type);
686 set_ia32_ls_mode(node, am->ls_mode);
687 if (am->pinned == op_pin_state_pinned) {
688 /* beware: some nodes are already pinned and did not allow to change the state */
689 if (get_irn_pinned(node) != op_pin_state_pinned)
690 set_irn_pinned(node, op_pin_state_pinned);
693 set_ia32_commutative(node);
697 * Check, if a given node is a Down-Conv, ie. a integer Conv
698 * from a mode with a mode with more bits to a mode with lesser bits.
699 * Moreover, we return only true if the node has not more than 1 user.
701 * @param node the node
702 * @return non-zero if node is a Down-Conv
704 static int is_downconv(const ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
726 /** Skip all Down-Conv's on a given node and return the resulting node. */
727 ir_node *ia32_skip_downconv(ir_node *node)
729 while (is_downconv(node))
730 node = get_Conv_op(node);
735 static bool is_sameconv(ir_node *node)
743 /* we only want to skip the conv when we're the only user
744 * (because this test is used in the context of address-mode selection
745 * and we don't want to use address mode for multiple users) */
746 if (get_irn_n_edges(node) > 1)
749 src_mode = get_irn_mode(get_Conv_op(node));
750 dest_mode = get_irn_mode(node);
752 ia32_mode_needs_gp_reg(src_mode) &&
753 ia32_mode_needs_gp_reg(dest_mode) &&
754 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
757 /** Skip all signedness convs */
758 static ir_node *ia32_skip_sameconv(ir_node *node)
760 while (is_sameconv(node))
761 node = get_Conv_op(node);
766 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
768 ir_mode *mode = get_irn_mode(node);
773 if (mode_is_signed(mode)) {
778 block = get_nodes_block(node);
779 dbgi = get_irn_dbg_info(node);
781 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
785 * matches operands of a node into ia32 addressing/operand modes. This covers
786 * usage of source address mode, immediates, operations with non 32-bit modes,
788 * The resulting data is filled into the @p am struct. block is the block
789 * of the node whose arguments are matched. op1, op2 are the first and second
790 * input that are matched (op1 may be NULL). other_op is another unrelated
791 * input that is not matched! but which is needed sometimes to check if AM
792 * for op1/op2 is legal.
793 * @p flags describes the supported modes of the operation in detail.
795 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
796 ir_node *op1, ir_node *op2, ir_node *other_op,
799 ia32_address_t *addr = &am->addr;
800 ir_mode *mode = get_irn_mode(op2);
801 int mode_bits = get_mode_size_bits(mode);
802 ir_node *new_op1, *new_op2;
804 unsigned commutative;
805 int use_am_and_immediates;
808 memset(am, 0, sizeof(am[0]));
810 commutative = (flags & match_commutative) != 0;
811 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
812 use_am = (flags & match_am) != 0;
813 use_immediate = (flags & match_immediate) != 0;
814 assert(!use_am_and_immediates || use_immediate);
817 assert(!commutative || op1 != NULL);
818 assert(use_am || !(flags & match_8bit_am));
819 assert(use_am || !(flags & match_16bit_am));
821 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
822 (mode_bits == 16 && !(flags & match_16bit_am))) {
826 /* we can simply skip downconvs for mode neutral nodes: the upper bits
827 * can be random for these operations */
828 if (flags & match_mode_neutral) {
829 op2 = ia32_skip_downconv(op2);
831 op1 = ia32_skip_downconv(op1);
834 op2 = ia32_skip_sameconv(op2);
836 op1 = ia32_skip_sameconv(op1);
840 /* match immediates. firm nodes are normalized: constants are always on the
843 if (!(flags & match_try_am) && use_immediate) {
844 new_op2 = ia32_try_create_Immediate(op2, 0);
847 if (new_op2 == NULL &&
848 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
849 build_address(am, op2, ia32_create_am_normal);
850 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
851 if (mode_is_float(mode)) {
852 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
856 am->op_type = ia32_AddrModeS;
857 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
859 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
861 build_address(am, op1, ia32_create_am_normal);
863 if (mode_is_float(mode)) {
864 noreg = ia32_new_NoReg_vfp(current_ir_graph);
869 if (new_op2 != NULL) {
872 new_op1 = be_transform_node(op2);
874 am->ins_permuted = 1;
876 am->op_type = ia32_AddrModeS;
879 am->op_type = ia32_Normal;
881 if (flags & match_try_am) {
887 mode = get_irn_mode(op2);
888 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
889 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
891 new_op2 = create_upconv(op2, NULL);
892 am->ls_mode = mode_Iu;
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
897 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1037 if (initial_fpcw != NULL)
1038 return initial_fpcw;
1040 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1041 &ia32_registers[REG_FPCW]);
1042 initial_fpcw = be_transform_node(fpcw);
1044 return initial_fpcw;
1048 * Construct a standard binary operation, set AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_binop_float_func *func)
1058 ir_mode *mode = get_irn_mode(node);
1060 ir_node *block, *new_block, *new_node;
1061 ia32_address_mode_t am;
1062 ia32_address_t *addr = &am.addr;
1063 ia32_x87_attr_t *attr;
1064 /* All operations are considered commutative, because there are reverse
1066 match_flags_t flags = match_commutative;
1068 /* happens for div nodes... */
1069 if (mode == mode_T) {
1071 mode = get_Div_resmode(node);
1073 panic("can't determine mode");
1076 /* cannot use address mode with long double on x87 */
1077 if (get_mode_size_bits(mode) <= 64)
1080 block = get_nodes_block(node);
1081 match_arguments(&am, block, op1, op2, NULL, flags);
1083 dbgi = get_irn_dbg_info(node);
1084 new_block = be_transform_node(block);
1085 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1086 am.new_op1, am.new_op2, get_fpcw());
1087 set_am_attributes(new_node, &am);
1089 attr = get_ia32_x87_attr(new_node);
1090 attr->attr.data.ins_permuted = am.ins_permuted;
1092 SET_IA32_ORIG_NODE(new_node, node);
1094 new_node = fix_mem_proj(new_node, &am);
1100 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1102 * @param op1 The first operand
1103 * @param op2 The second operand
1104 * @param func The node constructor function
1105 * @return The constructed ia32 node.
1107 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1108 construct_shift_func *func,
1109 match_flags_t flags)
1112 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1114 assert(! mode_is_float(get_irn_mode(node)));
1115 assert(flags & match_immediate);
1116 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1118 if (flags & match_mode_neutral) {
1119 op1 = ia32_skip_downconv(op1);
1120 new_op1 = be_transform_node(op1);
1121 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1122 new_op1 = create_upconv(op1, node);
1124 new_op1 = be_transform_node(op1);
1127 /* the shift amount can be any mode that is bigger than 5 bits, since all
1128 * other bits are ignored anyway */
1129 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1130 ir_node *const op = get_Conv_op(op2);
1131 if (mode_is_float(get_irn_mode(op)))
1134 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1136 new_op2 = create_immediate_or_transform(op2, 0);
1138 dbgi = get_irn_dbg_info(node);
1139 block = get_nodes_block(node);
1140 new_block = be_transform_node(block);
1141 new_node = func(dbgi, new_block, new_op1, new_op2);
1142 SET_IA32_ORIG_NODE(new_node, node);
1144 /* lowered shift instruction may have a dependency operand, handle it here */
1145 if (get_irn_arity(node) == 3) {
1146 /* we have a dependency */
1147 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1148 add_irn_dep(new_node, new_dep);
1156 * Construct a standard unary operation, set AM and immediate if required.
1158 * @param op The operand
1159 * @param func The node constructor function
1160 * @return The constructed ia32 node.
1162 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1163 match_flags_t flags)
1166 ir_node *block, *new_block, *new_op, *new_node;
1168 assert(flags == 0 || flags == match_mode_neutral);
1169 if (flags & match_mode_neutral) {
1170 op = ia32_skip_downconv(op);
1173 new_op = be_transform_node(op);
1174 dbgi = get_irn_dbg_info(node);
1175 block = get_nodes_block(node);
1176 new_block = be_transform_node(block);
1177 new_node = func(dbgi, new_block, new_op);
1179 SET_IA32_ORIG_NODE(new_node, node);
1184 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1185 ia32_address_t *addr)
1187 ir_node *base, *index, *res;
1193 base = be_transform_node(base);
1196 index = addr->index;
1197 if (index == NULL) {
1200 index = be_transform_node(index);
1203 res = new_bd_ia32_Lea(dbgi, block, base, index);
1204 set_address(res, addr);
1210 * Returns non-zero if a given address mode has a symbolic or
1211 * numerical offset != 0.
1213 static int am_has_immediates(const ia32_address_t *addr)
1215 return addr->offset != 0 || addr->symconst_ent != NULL
1216 || addr->frame_entity || addr->use_frame;
1220 * Creates an ia32 Add.
1222 * @return the created ia32 Add node
1224 static ir_node *gen_Add(ir_node *node)
1226 ir_mode *mode = get_irn_mode(node);
1227 ir_node *op1 = get_Add_left(node);
1228 ir_node *op2 = get_Add_right(node);
1230 ir_node *block, *new_block, *new_node, *add_immediate_op;
1231 ia32_address_t addr;
1232 ia32_address_mode_t am;
1234 if (mode_is_float(mode)) {
1235 if (ia32_cg_config.use_sse2)
1236 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1237 match_commutative | match_am);
1239 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1242 ia32_mark_non_am(node);
1244 op2 = ia32_skip_downconv(op2);
1245 op1 = ia32_skip_downconv(op1);
1249 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1250 * 1. Add with immediate -> Lea
1251 * 2. Add with possible source address mode -> Add
1252 * 3. Otherwise -> Lea
1254 memset(&addr, 0, sizeof(addr));
1255 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1256 add_immediate_op = NULL;
1258 dbgi = get_irn_dbg_info(node);
1259 block = get_nodes_block(node);
1260 new_block = be_transform_node(block);
1263 if (addr.base == NULL && addr.index == NULL) {
1264 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1265 addr.symconst_sign, 0, addr.offset);
1266 be_dep_on_frame(new_node);
1267 SET_IA32_ORIG_NODE(new_node, node);
1270 /* add with immediate? */
1271 if (addr.index == NULL) {
1272 add_immediate_op = addr.base;
1273 } else if (addr.base == NULL && addr.scale == 0) {
1274 add_immediate_op = addr.index;
1277 if (add_immediate_op != NULL) {
1278 if (!am_has_immediates(&addr)) {
1279 #ifdef DEBUG_libfirm
1280 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1283 return be_transform_node(add_immediate_op);
1286 new_node = create_lea_from_address(dbgi, new_block, &addr);
1287 SET_IA32_ORIG_NODE(new_node, node);
1291 /* test if we can use source address mode */
1292 match_arguments(&am, block, op1, op2, NULL, match_commutative
1293 | match_mode_neutral | match_am | match_immediate | match_try_am);
1295 /* construct an Add with source address mode */
1296 if (am.op_type == ia32_AddrModeS) {
1297 ia32_address_t *am_addr = &am.addr;
1298 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1299 am_addr->index, am_addr->mem, am.new_op1,
1301 set_am_attributes(new_node, &am);
1302 SET_IA32_ORIG_NODE(new_node, node);
1304 new_node = fix_mem_proj(new_node, &am);
1309 /* otherwise construct a lea */
1310 new_node = create_lea_from_address(dbgi, new_block, &addr);
1311 SET_IA32_ORIG_NODE(new_node, node);
1316 * Creates an ia32 Mul.
1318 * @return the created ia32 Mul node
1320 static ir_node *gen_Mul(ir_node *node)
1322 ir_node *op1 = get_Mul_left(node);
1323 ir_node *op2 = get_Mul_right(node);
1324 ir_mode *mode = get_irn_mode(node);
1326 if (mode_is_float(mode)) {
1327 if (ia32_cg_config.use_sse2)
1328 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1329 match_commutative | match_am);
1331 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1333 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1334 match_commutative | match_am | match_mode_neutral |
1335 match_immediate | match_am_and_immediates);
1339 * Creates an ia32 Mulh.
1340 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1341 * this result while Mul returns the lower 32 bit.
1343 * @return the created ia32 Mulh node
1345 static ir_node *gen_Mulh(ir_node *node)
1347 dbg_info *dbgi = get_irn_dbg_info(node);
1348 ir_node *op1 = get_Mulh_left(node);
1349 ir_node *op2 = get_Mulh_right(node);
1350 ir_mode *mode = get_irn_mode(node);
1352 ir_node *proj_res_high;
1354 if (get_mode_size_bits(mode) != 32) {
1355 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1358 if (mode_is_signed(mode)) {
1359 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1360 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1362 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1363 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1365 return proj_res_high;
1369 * Creates an ia32 And.
1371 * @return The created ia32 And node
1373 static ir_node *gen_And(ir_node *node)
1375 ir_node *op1 = get_And_left(node);
1376 ir_node *op2 = get_And_right(node);
1377 assert(! mode_is_float(get_irn_mode(node)));
1379 /* is it a zero extension? */
1380 if (is_Const(op2)) {
1381 ir_tarval *tv = get_Const_tarval(op2);
1382 long v = get_tarval_long(tv);
1384 if (v == 0xFF || v == 0xFFFF) {
1385 dbg_info *dbgi = get_irn_dbg_info(node);
1386 ir_node *block = get_nodes_block(node);
1393 assert(v == 0xFFFF);
1396 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1401 return gen_binop(node, op1, op2, new_bd_ia32_And,
1402 match_commutative | match_mode_neutral | match_am | match_immediate);
1408 * Creates an ia32 Or.
1410 * @return The created ia32 Or node
1412 static ir_node *gen_Or(ir_node *node)
1414 ir_node *op1 = get_Or_left(node);
1415 ir_node *op2 = get_Or_right(node);
1417 assert (! mode_is_float(get_irn_mode(node)));
1418 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1419 | match_mode_neutral | match_am | match_immediate);
1425 * Creates an ia32 Eor.
1427 * @return The created ia32 Eor node
1429 static ir_node *gen_Eor(ir_node *node)
1431 ir_node *op1 = get_Eor_left(node);
1432 ir_node *op2 = get_Eor_right(node);
1434 assert(! mode_is_float(get_irn_mode(node)));
1435 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1436 | match_mode_neutral | match_am | match_immediate);
1441 * Creates an ia32 Sub.
1443 * @return The created ia32 Sub node
1445 static ir_node *gen_Sub(ir_node *node)
1447 ir_node *op1 = get_Sub_left(node);
1448 ir_node *op2 = get_Sub_right(node);
1449 ir_mode *mode = get_irn_mode(node);
1451 if (mode_is_float(mode)) {
1452 if (ia32_cg_config.use_sse2)
1453 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1455 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1458 if (is_Const(op2)) {
1459 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1463 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1464 | match_am | match_immediate);
1467 static ir_node *transform_AM_mem(ir_node *const block,
1468 ir_node *const src_val,
1469 ir_node *const src_mem,
1470 ir_node *const am_mem)
1472 if (is_NoMem(am_mem)) {
1473 return be_transform_node(src_mem);
1474 } else if (is_Proj(src_val) &&
1476 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1477 /* avoid memory loop */
1479 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1480 ir_node *const ptr_pred = get_Proj_pred(src_val);
1481 int const arity = get_Sync_n_preds(src_mem);
1486 NEW_ARR_A(ir_node*, ins, arity + 1);
1488 /* NOTE: This sometimes produces dead-code because the old sync in
1489 * src_mem might not be used anymore, we should detect this case
1490 * and kill the sync... */
1491 for (i = arity - 1; i >= 0; --i) {
1492 ir_node *const pred = get_Sync_pred(src_mem, i);
1494 /* avoid memory loop */
1495 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1498 ins[n++] = be_transform_node(pred);
1503 return new_r_Sync(block, n, ins);
1507 ins[0] = be_transform_node(src_mem);
1509 return new_r_Sync(block, 2, ins);
1514 * Create a 32bit to 64bit signed extension.
1516 * @param dbgi debug info
1517 * @param block the block where node nodes should be placed
1518 * @param val the value to extend
1519 * @param orig the original node
1521 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1522 ir_node *val, const ir_node *orig)
1527 if (ia32_cg_config.use_short_sex_eax) {
1528 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1529 be_dep_on_frame(pval);
1530 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1532 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1533 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1535 SET_IA32_ORIG_NODE(res, orig);
1540 * Generates an ia32 Div with additional infrastructure for the
1541 * register allocator if needed.
1543 static ir_node *create_Div(ir_node *node)
1545 dbg_info *dbgi = get_irn_dbg_info(node);
1546 ir_node *block = get_nodes_block(node);
1547 ir_node *new_block = be_transform_node(block);
1554 ir_node *sign_extension;
1555 ia32_address_mode_t am;
1556 ia32_address_t *addr = &am.addr;
1558 /* the upper bits have random contents for smaller modes */
1559 switch (get_irn_opcode(node)) {
1561 op1 = get_Div_left(node);
1562 op2 = get_Div_right(node);
1563 mem = get_Div_mem(node);
1564 mode = get_Div_resmode(node);
1567 op1 = get_Mod_left(node);
1568 op2 = get_Mod_right(node);
1569 mem = get_Mod_mem(node);
1570 mode = get_Mod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 ir_mode *mode = get_Div_resmode(node);
1620 if (mode_is_float(mode)) {
1621 ir_node *op1 = get_Div_left(node);
1622 ir_node *op2 = get_Div_right(node);
1624 if (ia32_cg_config.use_sse2) {
1625 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1627 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1631 return create_Div(node);
1635 * Creates an ia32 Shl.
1637 * @return The created ia32 Shl node
1639 static ir_node *gen_Shl(ir_node *node)
1641 ir_node *left = get_Shl_left(node);
1642 ir_node *right = get_Shl_right(node);
1644 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1645 match_mode_neutral | match_immediate);
1649 * Creates an ia32 Shr.
1651 * @return The created ia32 Shr node
1653 static ir_node *gen_Shr(ir_node *node)
1655 ir_node *left = get_Shr_left(node);
1656 ir_node *right = get_Shr_right(node);
1658 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1664 * Creates an ia32 Sar.
1666 * @return The created ia32 Shrs node
1668 static ir_node *gen_Shrs(ir_node *node)
1670 ir_node *left = get_Shrs_left(node);
1671 ir_node *right = get_Shrs_right(node);
1673 if (is_Const(right)) {
1674 ir_tarval *tv = get_Const_tarval(right);
1675 long val = get_tarval_long(tv);
1677 /* this is a sign extension */
1678 dbg_info *dbgi = get_irn_dbg_info(node);
1679 ir_node *block = be_transform_node(get_nodes_block(node));
1680 ir_node *new_op = be_transform_node(left);
1682 return create_sex_32_64(dbgi, block, new_op, node);
1686 /* 8 or 16 bit sign extension? */
1687 if (is_Const(right) && is_Shl(left)) {
1688 ir_node *shl_left = get_Shl_left(left);
1689 ir_node *shl_right = get_Shl_right(left);
1690 if (is_Const(shl_right)) {
1691 ir_tarval *tv1 = get_Const_tarval(right);
1692 ir_tarval *tv2 = get_Const_tarval(shl_right);
1693 if (tv1 == tv2 && tarval_is_long(tv1)) {
1694 long val = get_tarval_long(tv1);
1695 if (val == 16 || val == 24) {
1696 dbg_info *dbgi = get_irn_dbg_info(node);
1697 ir_node *block = get_nodes_block(node);
1707 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1716 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1722 * Creates an ia32 Rol.
1724 * @param op1 The first operator
1725 * @param op2 The second operator
1726 * @return The created ia32 RotL node
1728 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1730 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1736 * Creates an ia32 Ror.
1737 * NOTE: There is no RotR with immediate because this would always be a RotL
1738 * "imm-mode_size_bits" which can be pre-calculated.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotR node
1744 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1752 * Creates an ia32 RotR or RotL (depending on the found pattern).
1754 * @return The created ia32 RotL or RotR node
1756 static ir_node *gen_Rotl(ir_node *node)
1758 ir_node *op1 = get_Rotl_left(node);
1759 ir_node *op2 = get_Rotl_right(node);
1761 if (is_Minus(op2)) {
1762 return gen_Ror(node, op1, get_Minus_op(op2));
1765 return gen_Rol(node, op1, op2);
1771 * Transforms a Minus node.
1773 * @return The created ia32 Minus node
1775 static ir_node *gen_Minus(ir_node *node)
1777 ir_node *op = get_Minus_op(node);
1778 ir_node *block = be_transform_node(get_nodes_block(node));
1779 dbg_info *dbgi = get_irn_dbg_info(node);
1780 ir_mode *mode = get_irn_mode(node);
1785 if (mode_is_float(mode)) {
1786 ir_node *new_op = be_transform_node(op);
1787 if (ia32_cg_config.use_sse2) {
1788 /* TODO: non-optimal... if we have many xXors, then we should
1789 * rather create a load for the const and use that instead of
1790 * several AM nodes... */
1791 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1793 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1794 noreg_GP, nomem, new_op, noreg_xmm);
1796 size = get_mode_size_bits(mode);
1797 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1799 set_ia32_am_sc(new_node, ent);
1800 set_ia32_op_type(new_node, ia32_AddrModeS);
1801 set_ia32_ls_mode(new_node, mode);
1803 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1806 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1809 SET_IA32_ORIG_NODE(new_node, node);
1815 * Transforms a Not node.
1817 * @return The created ia32 Not node
1819 static ir_node *gen_Not(ir_node *node)
1821 ir_node *op = get_Not_op(node);
1823 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1824 assert (! mode_is_float(get_irn_mode(node)));
1826 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1829 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1830 bool negate, ir_node *node)
1832 ir_node *new_block = be_transform_node(block);
1833 ir_mode *mode = get_irn_mode(op);
1839 if (mode_is_float(mode)) {
1840 new_op = be_transform_node(op);
1842 if (ia32_cg_config.use_sse2) {
1843 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1844 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1845 noreg_GP, nomem, new_op, noreg_fp);
1847 size = get_mode_size_bits(mode);
1848 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1850 set_ia32_am_sc(new_node, ent);
1852 SET_IA32_ORIG_NODE(new_node, node);
1854 set_ia32_op_type(new_node, ia32_AddrModeS);
1855 set_ia32_ls_mode(new_node, mode);
1857 /* TODO, implement -Abs case */
1860 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1861 SET_IA32_ORIG_NODE(new_node, node);
1863 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1864 SET_IA32_ORIG_NODE(new_node, node);
1869 ir_node *sign_extension;
1871 if (get_mode_size_bits(mode) == 32) {
1872 new_op = be_transform_node(op);
1874 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1877 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1879 xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1880 nomem, new_op, sign_extension);
1881 SET_IA32_ORIG_NODE(xorn, node);
1884 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1885 nomem, sign_extension, xorn);
1887 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1888 nomem, xorn, sign_extension);
1890 SET_IA32_ORIG_NODE(new_node, node);
1897 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1899 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1901 dbg_info *dbgi = get_irn_dbg_info(cmp);
1902 ir_node *block = get_nodes_block(cmp);
1903 ir_node *new_block = be_transform_node(block);
1904 ir_node *op1 = be_transform_node(x);
1905 ir_node *op2 = be_transform_node(n);
1907 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1911 * Transform a node returning a "flag" result.
1913 * @param node the node to transform
1914 * @param pnc_out the compare mode to use
1916 static ir_node *get_flags_node(ir_node *node, int *pnc_out)
1923 /* we have a Cmp as input */
1924 if (is_Proj(node)) {
1925 ir_node *pred = get_Proj_pred(node);
1927 int pnc = get_Proj_pn_cmp(node);
1928 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1929 ir_node *l = get_Cmp_left(pred);
1930 ir_node *r = get_Cmp_right(pred);
1932 ir_node *la = get_And_left(l);
1933 ir_node *ra = get_And_right(l);
1935 ir_node *c = get_Shl_left(la);
1936 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1937 /* (1 << n) & ra) */
1938 ir_node *n = get_Shl_right(la);
1939 flags = gen_bt(pred, ra, n);
1940 /* we must generate a Jc/Jnc jump */
1941 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1944 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1949 ir_node *c = get_Shl_left(ra);
1950 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1951 /* la & (1 << n)) */
1952 ir_node *n = get_Shl_right(ra);
1953 flags = gen_bt(pred, la, n);
1954 /* we must generate a Jc/Jnc jump */
1955 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1958 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1964 /* add ia32 compare flags */
1966 ir_node *l = get_Cmp_left(pred);
1967 ir_mode *mode = get_irn_mode(l);
1968 if (mode_is_float(mode))
1969 pnc |= ia32_pn_Cmp_float;
1970 else if (! mode_is_signed(mode))
1971 pnc |= ia32_pn_Cmp_unsigned;
1974 flags = be_transform_node(pred);
1979 /* a mode_b value, we have to compare it against 0 */
1980 dbgi = get_irn_dbg_info(node);
1981 new_block = be_transform_node(get_nodes_block(node));
1982 new_op = be_transform_node(node);
1983 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1984 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1985 *pnc_out = pn_Cmp_Lg;
1990 * Transforms a Load.
1992 * @return the created ia32 Load node
1994 static ir_node *gen_Load(ir_node *node)
1996 ir_node *old_block = get_nodes_block(node);
1997 ir_node *block = be_transform_node(old_block);
1998 ir_node *ptr = get_Load_ptr(node);
1999 ir_node *mem = get_Load_mem(node);
2000 ir_node *new_mem = be_transform_node(mem);
2003 dbg_info *dbgi = get_irn_dbg_info(node);
2004 ir_mode *mode = get_Load_mode(node);
2006 ia32_address_t addr;
2008 /* construct load address */
2009 memset(&addr, 0, sizeof(addr));
2010 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2017 base = be_transform_node(base);
2020 if (index == NULL) {
2023 index = be_transform_node(index);
2026 if (mode_is_float(mode)) {
2027 if (ia32_cg_config.use_sse2) {
2028 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2031 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2035 assert(mode != mode_b);
2037 /* create a conv node with address mode for smaller modes */
2038 if (get_mode_size_bits(mode) < 32) {
2039 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2040 new_mem, noreg_GP, mode);
2042 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2046 set_irn_pinned(new_node, get_irn_pinned(node));
2047 set_ia32_op_type(new_node, ia32_AddrModeS);
2048 set_ia32_ls_mode(new_node, mode);
2049 set_address(new_node, &addr);
2051 if (get_irn_pinned(node) == op_pin_state_floats) {
2052 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2053 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2054 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2055 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2058 SET_IA32_ORIG_NODE(new_node, node);
2060 be_dep_on_frame(new_node);
2064 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2065 ir_node *ptr, ir_node *other)
2072 /* we only use address mode if we're the only user of the load */
2073 if (get_irn_n_edges(node) > 1)
2076 load = get_Proj_pred(node);
2079 if (get_nodes_block(load) != block)
2082 /* store should have the same pointer as the load */
2083 if (get_Load_ptr(load) != ptr)
2086 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2087 if (other != NULL &&
2088 get_nodes_block(other) == block &&
2089 heights_reachable_in_block(ia32_heights, other, load)) {
2093 if (ia32_prevents_AM(block, load, mem))
2095 /* Store should be attached to the load via mem */
2096 assert(heights_reachable_in_block(ia32_heights, mem, load));
2101 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2102 ir_node *mem, ir_node *ptr, ir_mode *mode,
2103 construct_binop_dest_func *func,
2104 construct_binop_dest_func *func8bit,
2105 match_flags_t flags)
2107 ir_node *src_block = get_nodes_block(node);
2115 ia32_address_mode_t am;
2116 ia32_address_t *addr = &am.addr;
2117 memset(&am, 0, sizeof(am));
2119 assert(flags & match_immediate); /* there is no destam node without... */
2120 commutative = (flags & match_commutative) != 0;
2122 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2123 build_address(&am, op1, ia32_create_am_double_use);
2124 new_op = create_immediate_or_transform(op2, 0);
2125 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2126 build_address(&am, op2, ia32_create_am_double_use);
2127 new_op = create_immediate_or_transform(op1, 0);
2132 if (addr->base == NULL)
2133 addr->base = noreg_GP;
2134 if (addr->index == NULL)
2135 addr->index = noreg_GP;
2136 if (addr->mem == NULL)
2139 dbgi = get_irn_dbg_info(node);
2140 block = be_transform_node(src_block);
2141 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2143 if (get_mode_size_bits(mode) == 8) {
2144 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2146 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2148 set_address(new_node, addr);
2149 set_ia32_op_type(new_node, ia32_AddrModeD);
2150 set_ia32_ls_mode(new_node, mode);
2151 SET_IA32_ORIG_NODE(new_node, node);
2153 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2154 mem_proj = be_transform_node(am.mem_proj);
2155 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2160 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2161 ir_node *ptr, ir_mode *mode,
2162 construct_unop_dest_func *func)
2164 ir_node *src_block = get_nodes_block(node);
2170 ia32_address_mode_t am;
2171 ia32_address_t *addr = &am.addr;
2173 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2176 memset(&am, 0, sizeof(am));
2177 build_address(&am, op, ia32_create_am_double_use);
2179 dbgi = get_irn_dbg_info(node);
2180 block = be_transform_node(src_block);
2181 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2182 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2183 set_address(new_node, addr);
2184 set_ia32_op_type(new_node, ia32_AddrModeD);
2185 set_ia32_ls_mode(new_node, mode);
2186 SET_IA32_ORIG_NODE(new_node, node);
2188 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2189 mem_proj = be_transform_node(am.mem_proj);
2190 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2195 static int ia32_get_negated_pnc(int pnc)
2197 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2198 return get_negated_pnc(pnc, mode);
2201 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2203 ir_mode *mode = get_irn_mode(node);
2204 ir_node *mux_true = get_Mux_true(node);
2205 ir_node *mux_false = get_Mux_false(node);
2214 ia32_address_t addr;
2216 if (get_mode_size_bits(mode) != 8)
2219 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2221 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2227 cond = get_Mux_sel(node);
2228 flags = get_flags_node(cond, &pnc);
2229 /* we can't handle the float special cases with SetM */
2230 if (pnc & ia32_pn_Cmp_float)
2233 pnc = ia32_get_negated_pnc(pnc);
2235 build_address_ptr(&addr, ptr, mem);
2237 dbgi = get_irn_dbg_info(node);
2238 block = get_nodes_block(node);
2239 new_block = be_transform_node(block);
2240 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2241 addr.index, addr.mem, flags, pnc);
2242 set_address(new_node, &addr);
2243 set_ia32_op_type(new_node, ia32_AddrModeD);
2244 set_ia32_ls_mode(new_node, mode);
2245 SET_IA32_ORIG_NODE(new_node, node);
2250 static ir_node *try_create_dest_am(ir_node *node)
2252 ir_node *val = get_Store_value(node);
2253 ir_node *mem = get_Store_mem(node);
2254 ir_node *ptr = get_Store_ptr(node);
2255 ir_mode *mode = get_irn_mode(val);
2256 unsigned bits = get_mode_size_bits(mode);
2261 /* handle only GP modes for now... */
2262 if (!ia32_mode_needs_gp_reg(mode))
2266 /* store must be the only user of the val node */
2267 if (get_irn_n_edges(val) > 1)
2269 /* skip pointless convs */
2271 ir_node *conv_op = get_Conv_op(val);
2272 ir_mode *pred_mode = get_irn_mode(conv_op);
2273 if (!ia32_mode_needs_gp_reg(pred_mode))
2275 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2283 /* value must be in the same block */
2284 if (get_nodes_block(node) != get_nodes_block(val))
2287 switch (get_irn_opcode(val)) {
2289 op1 = get_Add_left(val);
2290 op2 = get_Add_right(val);
2291 if (ia32_cg_config.use_incdec) {
2292 if (is_Const_1(op2)) {
2293 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2295 } else if (is_Const_Minus_1(op2)) {
2296 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2300 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2301 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2302 match_commutative | match_immediate);
2305 op1 = get_Sub_left(val);
2306 op2 = get_Sub_right(val);
2307 if (is_Const(op2)) {
2308 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2310 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2311 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2315 op1 = get_And_left(val);
2316 op2 = get_And_right(val);
2317 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2318 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2319 match_commutative | match_immediate);
2322 op1 = get_Or_left(val);
2323 op2 = get_Or_right(val);
2324 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2325 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2326 match_commutative | match_immediate);
2329 op1 = get_Eor_left(val);
2330 op2 = get_Eor_right(val);
2331 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2332 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2333 match_commutative | match_immediate);
2336 op1 = get_Shl_left(val);
2337 op2 = get_Shl_right(val);
2338 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2339 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2343 op1 = get_Shr_left(val);
2344 op2 = get_Shr_right(val);
2345 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2346 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2350 op1 = get_Shrs_left(val);
2351 op2 = get_Shrs_right(val);
2352 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2353 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2357 op1 = get_Rotl_left(val);
2358 op2 = get_Rotl_right(val);
2359 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2360 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2363 /* TODO: match ROR patterns... */
2365 new_node = try_create_SetMem(val, ptr, mem);
2369 op1 = get_Minus_op(val);
2370 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2373 /* should be lowered already */
2374 assert(mode != mode_b);
2375 op1 = get_Not_op(val);
2376 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2382 if (new_node != NULL) {
2383 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2384 get_irn_pinned(node) == op_pin_state_pinned) {
2385 set_irn_pinned(new_node, op_pin_state_pinned);
2392 static bool possible_int_mode_for_fp(ir_mode *mode)
2396 if (!mode_is_signed(mode))
2398 size = get_mode_size_bits(mode);
2399 if (size != 16 && size != 32)
2404 static int is_float_to_int_conv(const ir_node *node)
2406 ir_mode *mode = get_irn_mode(node);
2410 if (!possible_int_mode_for_fp(mode))
2415 conv_op = get_Conv_op(node);
2416 conv_mode = get_irn_mode(conv_op);
2418 if (!mode_is_float(conv_mode))
2425 * Transform a Store(floatConst) into a sequence of
2428 * @return the created ia32 Store node
2430 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2432 ir_mode *mode = get_irn_mode(cns);
2433 unsigned size = get_mode_size_bytes(mode);
2434 ir_tarval *tv = get_Const_tarval(cns);
2435 ir_node *block = get_nodes_block(node);
2436 ir_node *new_block = be_transform_node(block);
2437 ir_node *ptr = get_Store_ptr(node);
2438 ir_node *mem = get_Store_mem(node);
2439 dbg_info *dbgi = get_irn_dbg_info(node);
2443 ia32_address_t addr;
2445 assert(size % 4 == 0);
2448 build_address_ptr(&addr, ptr, mem);
2452 get_tarval_sub_bits(tv, ofs) |
2453 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2454 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2455 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2456 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2458 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2459 addr.index, addr.mem, imm);
2461 set_irn_pinned(new_node, get_irn_pinned(node));
2462 set_ia32_op_type(new_node, ia32_AddrModeD);
2463 set_ia32_ls_mode(new_node, mode_Iu);
2464 set_address(new_node, &addr);
2465 SET_IA32_ORIG_NODE(new_node, node);
2468 ins[i++] = new_node;
2473 } while (size != 0);
2476 return new_rd_Sync(dbgi, new_block, i, ins);
2483 * Generate a vfist or vfisttp instruction.
2485 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2486 ir_node *mem, ir_node *val, ir_node **fist)
2490 if (ia32_cg_config.use_fisttp) {
2491 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2492 if other users exists */
2493 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2494 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2495 be_new_Keep(block, 1, &value);
2497 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2500 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2503 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2509 * Transforms a general (no special case) Store.
2511 * @return the created ia32 Store node
2513 static ir_node *gen_general_Store(ir_node *node)
2515 ir_node *val = get_Store_value(node);
2516 ir_mode *mode = get_irn_mode(val);
2517 ir_node *block = get_nodes_block(node);
2518 ir_node *new_block = be_transform_node(block);
2519 ir_node *ptr = get_Store_ptr(node);
2520 ir_node *mem = get_Store_mem(node);
2521 dbg_info *dbgi = get_irn_dbg_info(node);
2522 ir_node *new_val, *new_node, *store;
2523 ia32_address_t addr;
2525 /* check for destination address mode */
2526 new_node = try_create_dest_am(node);
2527 if (new_node != NULL)
2530 /* construct store address */
2531 memset(&addr, 0, sizeof(addr));
2532 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2534 if (addr.base == NULL) {
2535 addr.base = noreg_GP;
2537 addr.base = be_transform_node(addr.base);
2540 if (addr.index == NULL) {
2541 addr.index = noreg_GP;
2543 addr.index = be_transform_node(addr.index);
2545 addr.mem = be_transform_node(mem);
2547 if (mode_is_float(mode)) {
2548 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2550 while (is_Conv(val) && mode == get_irn_mode(val)) {
2551 ir_node *op = get_Conv_op(val);
2552 if (!mode_is_float(get_irn_mode(op)))
2556 new_val = be_transform_node(val);
2557 if (ia32_cg_config.use_sse2) {
2558 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2559 addr.index, addr.mem, new_val);
2561 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2562 addr.index, addr.mem, new_val, mode);
2565 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2566 val = get_Conv_op(val);
2568 /* TODO: is this optimisation still necessary at all (middleend)? */
2569 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2570 while (is_Conv(val)) {
2571 ir_node *op = get_Conv_op(val);
2572 if (!mode_is_float(get_irn_mode(op)))
2574 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2578 new_val = be_transform_node(val);
2579 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2581 new_val = create_immediate_or_transform(val, 0);
2582 assert(mode != mode_b);
2584 if (get_mode_size_bits(mode) == 8) {
2585 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2586 addr.index, addr.mem, new_val);
2588 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2589 addr.index, addr.mem, new_val);
2594 set_irn_pinned(store, get_irn_pinned(node));
2595 set_ia32_op_type(store, ia32_AddrModeD);
2596 set_ia32_ls_mode(store, mode);
2598 set_address(store, &addr);
2599 SET_IA32_ORIG_NODE(store, node);
2605 * Transforms a Store.
2607 * @return the created ia32 Store node
2609 static ir_node *gen_Store(ir_node *node)
2611 ir_node *val = get_Store_value(node);
2612 ir_mode *mode = get_irn_mode(val);
2614 if (mode_is_float(mode) && is_Const(val)) {
2615 /* We can transform every floating const store
2616 into a sequence of integer stores.
2617 If the constant is already in a register,
2618 it would be better to use it, but we don't
2619 have this information here. */
2620 return gen_float_const_Store(node, val);
2622 return gen_general_Store(node);
2626 * Transforms a Switch.
2628 * @return the created ia32 SwitchJmp node
2630 static ir_node *create_Switch(ir_node *node)
2632 dbg_info *dbgi = get_irn_dbg_info(node);
2633 ir_node *block = be_transform_node(get_nodes_block(node));
2634 ir_node *sel = get_Cond_selector(node);
2635 ir_node *new_sel = be_transform_node(sel);
2636 long switch_min = LONG_MAX;
2637 long switch_max = LONG_MIN;
2638 long default_pn = get_Cond_default_proj(node);
2640 const ir_edge_t *edge;
2642 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2644 /* determine the smallest switch case value */
2645 foreach_out_edge(node, edge) {
2646 ir_node *proj = get_edge_src_irn(edge);
2647 long pn = get_Proj_proj(proj);
2648 if (pn == default_pn)
2651 if (pn < switch_min)
2653 if (pn > switch_max)
2657 if ((unsigned long) (switch_max - switch_min) > 128000) {
2658 panic("Size of switch %+F bigger than 128000", node);
2661 if (switch_min != 0) {
2662 /* if smallest switch case is not 0 we need an additional sub */
2663 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2664 add_ia32_am_offs_int(new_sel, -switch_min);
2665 set_ia32_op_type(new_sel, ia32_AddrModeS);
2667 SET_IA32_ORIG_NODE(new_sel, node);
2670 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2671 SET_IA32_ORIG_NODE(new_node, node);
2677 * Transform a Cond node.
2679 static ir_node *gen_Cond(ir_node *node)
2681 ir_node *block = get_nodes_block(node);
2682 ir_node *new_block = be_transform_node(block);
2683 dbg_info *dbgi = get_irn_dbg_info(node);
2684 ir_node *sel = get_Cond_selector(node);
2685 ir_mode *sel_mode = get_irn_mode(sel);
2686 ir_node *flags = NULL;
2690 if (sel_mode != mode_b) {
2691 return create_Switch(node);
2694 /* we get flags from a Cmp */
2695 flags = get_flags_node(sel, &pnc);
2697 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2698 SET_IA32_ORIG_NODE(new_node, node);
2704 * Transform a be_Copy.
2706 static ir_node *gen_be_Copy(ir_node *node)
2708 ir_node *new_node = be_duplicate_node(node);
2709 ir_mode *mode = get_irn_mode(new_node);
2711 if (ia32_mode_needs_gp_reg(mode)) {
2712 set_irn_mode(new_node, mode_Iu);
2718 static ir_node *create_Fucom(ir_node *node)
2720 dbg_info *dbgi = get_irn_dbg_info(node);
2721 ir_node *block = get_nodes_block(node);
2722 ir_node *new_block = be_transform_node(block);
2723 ir_node *left = get_Cmp_left(node);
2724 ir_node *new_left = be_transform_node(left);
2725 ir_node *right = get_Cmp_right(node);
2729 if (ia32_cg_config.use_fucomi) {
2730 new_right = be_transform_node(right);
2731 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2733 set_ia32_commutative(new_node);
2734 SET_IA32_ORIG_NODE(new_node, node);
2736 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2737 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2739 new_right = be_transform_node(right);
2740 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2743 set_ia32_commutative(new_node);
2745 SET_IA32_ORIG_NODE(new_node, node);
2747 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2748 SET_IA32_ORIG_NODE(new_node, node);
2754 static ir_node *create_Ucomi(ir_node *node)
2756 dbg_info *dbgi = get_irn_dbg_info(node);
2757 ir_node *src_block = get_nodes_block(node);
2758 ir_node *new_block = be_transform_node(src_block);
2759 ir_node *left = get_Cmp_left(node);
2760 ir_node *right = get_Cmp_right(node);
2762 ia32_address_mode_t am;
2763 ia32_address_t *addr = &am.addr;
2765 match_arguments(&am, src_block, left, right, NULL,
2766 match_commutative | match_am);
2768 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2769 addr->mem, am.new_op1, am.new_op2,
2771 set_am_attributes(new_node, &am);
2773 SET_IA32_ORIG_NODE(new_node, node);
2775 new_node = fix_mem_proj(new_node, &am);
2781 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2782 * to fold an and into a test node
2784 static bool can_fold_test_and(ir_node *node)
2786 const ir_edge_t *edge;
2788 /** we can only have eq and lg projs */
2789 foreach_out_edge(node, edge) {
2790 ir_node *proj = get_edge_src_irn(edge);
2791 pn_Cmp pnc = get_Proj_pn_cmp(proj);
2792 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2800 * returns true if it is assured, that the upper bits of a node are "clean"
2801 * which means for a 16 or 8 bit value, that the upper bits in the register
2802 * are 0 for unsigned and a copy of the last significant bit for signed
2805 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2807 assert(ia32_mode_needs_gp_reg(mode));
2808 if (get_mode_size_bits(mode) >= 32)
2811 if (is_Proj(transformed_node))
2812 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2814 switch (get_ia32_irn_opcode(transformed_node)) {
2815 case iro_ia32_Conv_I2I:
2816 case iro_ia32_Conv_I2I8Bit: {
2817 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2818 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2820 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2827 if (mode_is_signed(mode)) {
2828 return false; /* TODO handle signed modes */
2830 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2831 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2832 const ia32_immediate_attr_t *attr
2833 = get_ia32_immediate_attr_const(right);
2834 if (attr->symconst == 0 &&
2835 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2839 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2843 /* TODO too conservative if shift amount is constant */
2844 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2847 if (!mode_is_signed(mode)) {
2849 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2850 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2852 /* TODO if one is known to be zero extended, then || is sufficient */
2857 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2858 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2860 case iro_ia32_Const:
2861 case iro_ia32_Immediate: {
2862 const ia32_immediate_attr_t *attr =
2863 get_ia32_immediate_attr_const(transformed_node);
2864 if (mode_is_signed(mode)) {
2865 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2866 return shifted == 0 || shifted == -1;
2868 unsigned long shifted = (unsigned long)attr->offset;
2869 shifted >>= get_mode_size_bits(mode);
2870 return shifted == 0;
2880 * Generate code for a Cmp.
2882 static ir_node *gen_Cmp(ir_node *node)
2884 dbg_info *dbgi = get_irn_dbg_info(node);
2885 ir_node *block = get_nodes_block(node);
2886 ir_node *new_block = be_transform_node(block);
2887 ir_node *left = get_Cmp_left(node);
2888 ir_node *right = get_Cmp_right(node);
2889 ir_mode *cmp_mode = get_irn_mode(left);
2891 ia32_address_mode_t am;
2892 ia32_address_t *addr = &am.addr;
2895 if (mode_is_float(cmp_mode)) {
2896 if (ia32_cg_config.use_sse2) {
2897 return create_Ucomi(node);
2899 return create_Fucom(node);
2903 assert(ia32_mode_needs_gp_reg(cmp_mode));
2905 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2906 cmp_unsigned = !mode_is_signed(cmp_mode);
2907 if (is_Const_0(right) &&
2909 get_irn_n_edges(left) == 1 &&
2910 can_fold_test_and(node)) {
2911 /* Test(and_left, and_right) */
2912 ir_node *and_left = get_And_left(left);
2913 ir_node *and_right = get_And_right(left);
2915 /* matze: code here used mode instead of cmd_mode, I think it is always
2916 * the same as cmp_mode, but I leave this here to see if this is really
2919 assert(get_irn_mode(and_left) == cmp_mode);
2921 match_arguments(&am, block, and_left, and_right, NULL,
2923 match_am | match_8bit_am | match_16bit_am |
2924 match_am_and_immediates | match_immediate);
2926 /* use 32bit compare mode if possible since the opcode is smaller */
2927 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2928 upper_bits_clean(am.new_op2, cmp_mode)) {
2929 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2932 if (get_mode_size_bits(cmp_mode) == 8) {
2933 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2934 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2937 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2938 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2941 /* Cmp(left, right) */
2942 match_arguments(&am, block, left, right, NULL,
2943 match_commutative | match_am | match_8bit_am |
2944 match_16bit_am | match_am_and_immediates |
2946 /* use 32bit compare mode if possible since the opcode is smaller */
2947 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2948 upper_bits_clean(am.new_op2, cmp_mode)) {
2949 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2952 if (get_mode_size_bits(cmp_mode) == 8) {
2953 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2954 addr->index, addr->mem, am.new_op1,
2955 am.new_op2, am.ins_permuted,
2958 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2959 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2962 set_am_attributes(new_node, &am);
2963 set_ia32_ls_mode(new_node, cmp_mode);
2965 SET_IA32_ORIG_NODE(new_node, node);
2967 new_node = fix_mem_proj(new_node, &am);
2972 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2975 dbg_info *dbgi = get_irn_dbg_info(node);
2976 ir_node *block = get_nodes_block(node);
2977 ir_node *new_block = be_transform_node(block);
2978 ir_node *val_true = get_Mux_true(node);
2979 ir_node *val_false = get_Mux_false(node);
2981 ia32_address_mode_t am;
2982 ia32_address_t *addr;
2984 assert(ia32_cg_config.use_cmov);
2985 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2989 match_arguments(&am, block, val_false, val_true, flags,
2990 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2992 if (am.ins_permuted)
2993 pnc = ia32_get_negated_pnc(pnc);
2995 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2996 addr->mem, am.new_op1, am.new_op2, new_flags,
2998 set_am_attributes(new_node, &am);
3000 SET_IA32_ORIG_NODE(new_node, node);
3002 new_node = fix_mem_proj(new_node, &am);
3008 * Creates a ia32 Setcc instruction.
3010 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3011 ir_node *flags, int pnc,
3014 ir_mode *mode = get_irn_mode(orig_node);
3017 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3018 SET_IA32_ORIG_NODE(new_node, orig_node);
3020 /* we might need to conv the result up */
3021 if (get_mode_size_bits(mode) > 8) {
3022 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3023 nomem, new_node, mode_Bu);
3024 SET_IA32_ORIG_NODE(new_node, orig_node);
3031 * Create instruction for an unsigned Difference or Zero.
3033 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3035 ir_mode *mode = get_irn_mode(psi);
3045 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3046 match_mode_neutral | match_am | match_immediate | match_two_users);
3048 block = get_nodes_block(new_node);
3050 if (is_Proj(new_node)) {
3051 sub = get_Proj_pred(new_node);
3052 assert(is_ia32_Sub(sub));
3055 set_irn_mode(sub, mode_T);
3056 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3058 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3060 dbgi = get_irn_dbg_info(psi);
3061 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3062 notn = new_bd_ia32_Not(dbgi, block, sbb);
3064 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3065 set_ia32_commutative(new_node);
3070 * Create an const array of two float consts.
3072 * @param c0 the first constant
3073 * @param c1 the second constant
3074 * @param new_mode IN/OUT for the mode of the constants, if NULL
3075 * smallest possible mode will be used
3077 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3080 ir_mode *mode = *new_mode;
3082 ir_initializer_t *initializer;
3083 ir_tarval *tv0 = get_Const_tarval(c0);
3084 ir_tarval *tv1 = get_Const_tarval(c1);
3087 /* detect the best mode for the constants */
3088 mode = get_tarval_mode(tv0);
3090 if (mode != mode_F) {
3091 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3092 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3094 tv0 = tarval_convert_to(tv0, mode);
3095 tv1 = tarval_convert_to(tv1, mode);
3096 } else if (mode != mode_D) {
3097 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3098 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3100 tv0 = tarval_convert_to(tv0, mode);
3101 tv1 = tarval_convert_to(tv1, mode);
3108 tp = ia32_create_float_type(mode, 4);
3109 tp = ia32_create_float_array(tp);
3111 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3113 set_entity_ld_ident(ent, get_entity_ident(ent));
3114 set_entity_visibility(ent, ir_visibility_private);
3115 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3117 initializer = create_initializer_compound(2);
3119 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3120 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3122 set_entity_initializer(ent, initializer);
3129 * Possible transformations for creating a Setcc.
3131 enum setcc_transform_insn {
3144 typedef struct setcc_transform {
3148 enum setcc_transform_insn transform;
3152 } setcc_transform_t;
3155 * Setcc can only handle 0 and 1 result.
3156 * Find a transformation that creates 0 and 1 from
3159 static void find_const_transform(int pnc, ir_tarval *t, ir_tarval *f,
3160 setcc_transform_t *res)
3166 if (tarval_is_null(t)) {
3170 pnc = ia32_get_negated_pnc(pnc);
3171 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3172 // now, t is the bigger one
3176 pnc = ia32_get_negated_pnc(pnc);
3180 if (! tarval_is_null(f)) {
3181 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3184 res->steps[step].transform = SETCC_TR_ADD;
3186 if (t == tarval_bad)
3187 panic("constant subtract failed");
3188 if (! tarval_is_long(f))
3189 panic("tarval is not long");
3191 res->steps[step].val = get_tarval_long(f);
3193 f = tarval_sub(f, f, NULL);
3194 assert(tarval_is_null(f));
3197 if (tarval_is_one(t)) {
3198 res->steps[step].transform = SETCC_TR_SET;
3199 res->num_steps = ++step;
3203 if (tarval_is_minus_one(t)) {
3204 res->steps[step].transform = SETCC_TR_NEG;
3206 res->steps[step].transform = SETCC_TR_SET;
3207 res->num_steps = ++step;
3210 if (tarval_is_long(t)) {
3211 long v = get_tarval_long(t);
3213 res->steps[step].val = 0;
3216 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3218 res->steps[step].transform = SETCC_TR_LEAxx;
3219 res->steps[step].scale = 3; /* (a << 3) + a */
3222 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3224 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3225 res->steps[step].scale = 3; /* (a << 3) */
3228 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3230 res->steps[step].transform = SETCC_TR_LEAxx;
3231 res->steps[step].scale = 2; /* (a << 2) + a */
3234 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3236 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3237 res->steps[step].scale = 2; /* (a << 2) */
3240 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3242 res->steps[step].transform = SETCC_TR_LEAxx;
3243 res->steps[step].scale = 1; /* (a << 1) + a */
3246 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3248 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3249 res->steps[step].scale = 1; /* (a << 1) */
3252 res->num_steps = step;
3255 if (! tarval_is_single_bit(t)) {
3256 res->steps[step].transform = SETCC_TR_AND;
3257 res->steps[step].val = v;
3259 res->steps[step].transform = SETCC_TR_NEG;
3261 int v = get_tarval_lowest_bit(t);
3264 res->steps[step].transform = SETCC_TR_SHL;
3265 res->steps[step].scale = v;
3269 res->steps[step].transform = SETCC_TR_SET;
3270 res->num_steps = ++step;
3273 panic("tarval is not long");
3277 * Transforms a Mux node into some code sequence.
3279 * @return The transformed node.
3281 static ir_node *gen_Mux(ir_node *node)
3283 dbg_info *dbgi = get_irn_dbg_info(node);
3284 ir_node *block = get_nodes_block(node);
3285 ir_node *new_block = be_transform_node(block);
3286 ir_node *mux_true = get_Mux_true(node);
3287 ir_node *mux_false = get_Mux_false(node);
3288 ir_node *cond = get_Mux_sel(node);
3289 ir_mode *mode = get_irn_mode(node);
3295 assert(get_irn_mode(cond) == mode_b);
3297 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3299 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3302 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3303 if (mode_is_float(mode)) {
3304 ir_node *cmp = get_Proj_pred(cond);
3305 ir_node *cmp_left = get_Cmp_left(cmp);
3306 ir_node *cmp_right = get_Cmp_right(cmp);
3307 int pnc = get_Proj_proj(cond);
3309 if (ia32_cg_config.use_sse2) {
3310 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3311 if (cmp_left == mux_true && cmp_right == mux_false) {
3312 /* Mux(a <= b, a, b) => MIN */
3313 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3314 match_commutative | match_am | match_two_users);
3315 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3316 /* Mux(a <= b, b, a) => MAX */
3317 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3318 match_commutative | match_am | match_two_users);
3320 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3321 if (cmp_left == mux_true && cmp_right == mux_false) {
3322 /* Mux(a >= b, a, b) => MAX */
3323 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3324 match_commutative | match_am | match_two_users);
3325 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3326 /* Mux(a >= b, b, a) => MIN */
3327 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3328 match_commutative | match_am | match_two_users);
3333 if (is_Const(mux_true) && is_Const(mux_false)) {
3334 ia32_address_mode_t am;
3339 flags = get_flags_node(cond, &pnc);
3340 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3342 if (ia32_cg_config.use_sse2) {
3343 /* cannot load from different mode on SSE */
3346 /* x87 can load any mode */
3350 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3352 switch (get_mode_size_bytes(new_mode)) {
3362 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3363 set_ia32_am_scale(new_node, 2);
3368 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3369 set_ia32_am_scale(new_node, 1);
3372 /* arg, shift 16 NOT supported */
3374 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3377 panic("Unsupported constant size");
3380 am.ls_mode = new_mode;
3381 am.addr.base = get_symconst_base();
3382 am.addr.index = new_node;
3383 am.addr.mem = nomem;
3385 am.addr.scale = scale;
3386 am.addr.use_frame = 0;
3387 am.addr.frame_entity = NULL;
3388 am.addr.symconst_sign = 0;
3389 am.mem_proj = am.addr.mem;
3390 am.op_type = ia32_AddrModeS;
3393 am.pinned = op_pin_state_floats;
3395 am.ins_permuted = 0;
3397 if (ia32_cg_config.use_sse2)
3398 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3400 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3401 set_am_attributes(load, &am);
3403 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3405 panic("cannot transform floating point Mux");
3408 assert(ia32_mode_needs_gp_reg(mode));
3410 if (is_Proj(cond)) {
3411 ir_node *cmp = get_Proj_pred(cond);
3413 ir_node *cmp_left = get_Cmp_left(cmp);
3414 ir_node *cmp_right = get_Cmp_right(cmp);
3415 ir_node *val_true = mux_true;
3416 ir_node *val_false = mux_false;
3417 int pnc = get_Proj_proj(cond);
3419 if (is_Const(val_true) && is_Const_null(val_true)) {
3420 ir_node *tmp = val_false;
3421 val_false = val_true;
3423 pnc = ia32_get_negated_pnc(pnc);
3425 if (is_Const_0(val_false) && is_Sub(val_true)) {
3426 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3427 && get_Sub_left(val_true) == cmp_left
3428 && get_Sub_right(val_true) == cmp_right) {
3429 return create_doz(node, cmp_left, cmp_right);
3431 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3432 && get_Sub_left(val_true) == cmp_right
3433 && get_Sub_right(val_true) == cmp_left) {
3434 return create_doz(node, cmp_right, cmp_left);
3440 flags = get_flags_node(cond, &pnc);
3442 if (is_Const(mux_true) && is_Const(mux_false)) {
3443 /* both are const, good */
3444 ir_tarval *tv_true = get_Const_tarval(mux_true);
3445 ir_tarval *tv_false = get_Const_tarval(mux_false);
3446 setcc_transform_t res;
3449 find_const_transform(pnc, tv_true, tv_false, &res);
3451 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3454 switch (res.steps[step].transform) {
3456 imm = ia32_immediate_from_long(res.steps[step].val);
3457 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3459 case SETCC_TR_ADDxx:
3460 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3463 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3464 set_ia32_am_scale(new_node, res.steps[step].scale);
3465 set_ia32_am_offs_int(new_node, res.steps[step].val);
3467 case SETCC_TR_LEAxx:
3468 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3469 set_ia32_am_scale(new_node, res.steps[step].scale);
3470 set_ia32_am_offs_int(new_node, res.steps[step].val);
3473 imm = ia32_immediate_from_long(res.steps[step].scale);
3474 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3477 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3480 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3483 imm = ia32_immediate_from_long(res.steps[step].val);
3484 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3487 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3490 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3493 panic("unknown setcc transform");
3497 new_node = create_CMov(node, cond, flags, pnc);
3505 * Create a conversion from x87 state register to general purpose.
3507 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3509 ir_node *block = be_transform_node(get_nodes_block(node));
3510 ir_node *op = get_Conv_op(node);
3511 ir_node *new_op = be_transform_node(op);
3512 ir_graph *irg = current_ir_graph;
3513 dbg_info *dbgi = get_irn_dbg_info(node);
3514 ir_mode *mode = get_irn_mode(node);
3515 ir_node *fist, *load, *mem;
3517 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3518 set_irn_pinned(fist, op_pin_state_floats);
3519 set_ia32_use_frame(fist);
3520 set_ia32_op_type(fist, ia32_AddrModeD);
3522 assert(get_mode_size_bits(mode) <= 32);
3523 /* exception we can only store signed 32 bit integers, so for unsigned
3524 we store a 64bit (signed) integer and load the lower bits */
3525 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3526 set_ia32_ls_mode(fist, mode_Ls);
3528 set_ia32_ls_mode(fist, mode_Is);
3530 SET_IA32_ORIG_NODE(fist, node);
3533 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3535 set_irn_pinned(load, op_pin_state_floats);
3536 set_ia32_use_frame(load);
3537 set_ia32_op_type(load, ia32_AddrModeS);
3538 set_ia32_ls_mode(load, mode_Is);
3539 if (get_ia32_ls_mode(fist) == mode_Ls) {
3540 ia32_attr_t *attr = get_ia32_attr(load);
3541 attr->data.need_64bit_stackent = 1;
3543 ia32_attr_t *attr = get_ia32_attr(load);
3544 attr->data.need_32bit_stackent = 1;
3546 SET_IA32_ORIG_NODE(load, node);
3548 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3552 * Creates a x87 strict Conv by placing a Store and a Load
3554 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3556 ir_node *block = get_nodes_block(node);
3557 ir_graph *irg = get_Block_irg(block);
3558 dbg_info *dbgi = get_irn_dbg_info(node);
3559 ir_node *frame = get_irg_frame(irg);
3560 ir_node *store, *load;
3563 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3564 set_ia32_use_frame(store);
3565 set_ia32_op_type(store, ia32_AddrModeD);
3566 SET_IA32_ORIG_NODE(store, node);
3568 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3569 set_ia32_use_frame(load);
3570 set_ia32_op_type(load, ia32_AddrModeS);
3571 SET_IA32_ORIG_NODE(load, node);
3573 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3577 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3578 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3580 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3582 func = get_mode_size_bits(mode) == 8 ?
3583 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3584 return func(dbgi, block, base, index, mem, val, mode);
3588 * Create a conversion from general purpose to x87 register
3590 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3592 ir_node *src_block = get_nodes_block(node);
3593 ir_node *block = be_transform_node(src_block);
3594 ir_graph *irg = get_Block_irg(block);
3595 dbg_info *dbgi = get_irn_dbg_info(node);
3596 ir_node *op = get_Conv_op(node);
3597 ir_node *new_op = NULL;
3599 ir_mode *store_mode;
3604 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3605 if (possible_int_mode_for_fp(src_mode)) {
3606 ia32_address_mode_t am;
3608 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3609 if (am.op_type == ia32_AddrModeS) {
3610 ia32_address_t *addr = &am.addr;
3612 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3613 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3615 set_am_attributes(fild, &am);
3616 SET_IA32_ORIG_NODE(fild, node);
3618 fix_mem_proj(fild, &am);
3623 if (new_op == NULL) {
3624 new_op = be_transform_node(op);
3627 mode = get_irn_mode(op);
3629 /* first convert to 32 bit signed if necessary */
3630 if (get_mode_size_bits(src_mode) < 32) {
3631 if (!upper_bits_clean(new_op, src_mode)) {
3632 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3633 SET_IA32_ORIG_NODE(new_op, node);
3638 assert(get_mode_size_bits(mode) == 32);
3641 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3643 set_ia32_use_frame(store);
3644 set_ia32_op_type(store, ia32_AddrModeD);
3645 set_ia32_ls_mode(store, mode_Iu);
3647 /* exception for 32bit unsigned, do a 64bit spill+load */
3648 if (!mode_is_signed(mode)) {
3651 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3653 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3654 noreg_GP, nomem, zero_const);
3656 set_ia32_use_frame(zero_store);
3657 set_ia32_op_type(zero_store, ia32_AddrModeD);
3658 add_ia32_am_offs_int(zero_store, 4);
3659 set_ia32_ls_mode(zero_store, mode_Iu);
3664 store = new_rd_Sync(dbgi, block, 2, in);
3665 store_mode = mode_Ls;
3667 store_mode = mode_Is;
3671 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3673 set_ia32_use_frame(fild);
3674 set_ia32_op_type(fild, ia32_AddrModeS);
3675 set_ia32_ls_mode(fild, store_mode);
3677 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3683 * Create a conversion from one integer mode into another one
3685 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3686 dbg_info *dbgi, ir_node *block, ir_node *op,
3689 ir_node *new_block = be_transform_node(block);
3691 ir_mode *smaller_mode;
3692 ia32_address_mode_t am;
3693 ia32_address_t *addr = &am.addr;
3696 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3697 smaller_mode = src_mode;
3699 smaller_mode = tgt_mode;
3702 #ifdef DEBUG_libfirm
3704 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3709 match_arguments(&am, block, NULL, op, NULL,
3710 match_am | match_8bit_am | match_16bit_am);
3712 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3713 /* unnecessary conv. in theory it shouldn't have been AM */
3714 assert(is_ia32_NoReg_GP(addr->base));
3715 assert(is_ia32_NoReg_GP(addr->index));
3716 assert(is_NoMem(addr->mem));
3717 assert(am.addr.offset == 0);
3718 assert(am.addr.symconst_ent == NULL);
3722 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3723 addr->mem, am.new_op2, smaller_mode);
3724 set_am_attributes(new_node, &am);
3725 /* match_arguments assume that out-mode = in-mode, this isn't true here
3727 set_ia32_ls_mode(new_node, smaller_mode);
3728 SET_IA32_ORIG_NODE(new_node, node);
3729 new_node = fix_mem_proj(new_node, &am);
3734 * Transforms a Conv node.
3736 * @return The created ia32 Conv node
3738 static ir_node *gen_Conv(ir_node *node)
3740 ir_node *block = get_nodes_block(node);
3741 ir_node *new_block = be_transform_node(block);
3742 ir_node *op = get_Conv_op(node);
3743 ir_node *new_op = NULL;
3744 dbg_info *dbgi = get_irn_dbg_info(node);
3745 ir_mode *src_mode = get_irn_mode(op);
3746 ir_mode *tgt_mode = get_irn_mode(node);
3747 int src_bits = get_mode_size_bits(src_mode);
3748 int tgt_bits = get_mode_size_bits(tgt_mode);
3749 ir_node *res = NULL;
3751 assert(!mode_is_int(src_mode) || src_bits <= 32);
3752 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3754 /* modeB -> X should already be lowered by the lower_mode_b pass */
3755 if (src_mode == mode_b) {
3756 panic("ConvB not lowered %+F", node);
3759 if (src_mode == tgt_mode) {
3760 if (get_Conv_strict(node)) {
3761 if (ia32_cg_config.use_sse2) {
3762 /* when we are in SSE mode, we can kill all strict no-op conversion */
3763 return be_transform_node(op);
3766 /* this should be optimized already, but who knows... */
3767 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3768 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3769 return be_transform_node(op);
3773 if (mode_is_float(src_mode)) {
3774 new_op = be_transform_node(op);
3775 /* we convert from float ... */
3776 if (mode_is_float(tgt_mode)) {
3778 if (ia32_cg_config.use_sse2) {
3779 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3780 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3782 set_ia32_ls_mode(res, tgt_mode);
3784 if (get_Conv_strict(node)) {
3785 /* if fp_no_float_fold is not set then we assume that we
3786 * don't have any float operations in a non
3787 * mode_float_arithmetic mode and can skip strict upconvs */
3788 if (src_bits < tgt_bits) {
3789 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3792 res = gen_x87_strict_conv(tgt_mode, new_op);
3793 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3797 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3802 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3803 if (ia32_cg_config.use_sse2) {
3804 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3806 set_ia32_ls_mode(res, src_mode);
3808 return gen_x87_fp_to_gp(node);
3812 /* we convert from int ... */
3813 if (mode_is_float(tgt_mode)) {
3815 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3816 if (ia32_cg_config.use_sse2) {
3817 new_op = be_transform_node(op);
3818 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3820 set_ia32_ls_mode(res, tgt_mode);
3822 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3823 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3824 res = gen_x87_gp_to_fp(node, src_mode);
3826 /* we need a strict-Conv, if the int mode has more bits than the
3828 if (float_mantissa < int_mantissa) {
3829 res = gen_x87_strict_conv(tgt_mode, res);
3830 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3834 } else if (tgt_mode == mode_b) {
3835 /* mode_b lowering already took care that we only have 0/1 values */
3836 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3837 src_mode, tgt_mode));
3838 return be_transform_node(op);
3841 if (src_bits == tgt_bits) {
3842 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3843 src_mode, tgt_mode));
3844 return be_transform_node(op);
3847 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3855 static ir_node *create_immediate_or_transform(ir_node *node,
3856 char immediate_constraint_type)
3858 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3859 if (new_node == NULL) {
3860 new_node = be_transform_node(node);
3866 * Transforms a FrameAddr into an ia32 Add.
3868 static ir_node *gen_be_FrameAddr(ir_node *node)
3870 ir_node *block = be_transform_node(get_nodes_block(node));
3871 ir_node *op = be_get_FrameAddr_frame(node);
3872 ir_node *new_op = be_transform_node(op);
3873 dbg_info *dbgi = get_irn_dbg_info(node);
3876 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3877 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3878 set_ia32_use_frame(new_node);
3880 SET_IA32_ORIG_NODE(new_node, node);
3886 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3888 static ir_node *gen_be_Return(ir_node *node)
3890 ir_graph *irg = current_ir_graph;
3891 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3892 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3893 ir_entity *ent = get_irg_entity(irg);
3894 ir_type *tp = get_entity_type(ent);
3899 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3900 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3902 int pn_ret_val, pn_ret_mem, arity, i;
3904 assert(ret_val != NULL);
3905 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3906 return be_duplicate_node(node);
3909 res_type = get_method_res_type(tp, 0);
3911 if (! is_Primitive_type(res_type)) {
3912 return be_duplicate_node(node);
3915 mode = get_type_mode(res_type);
3916 if (! mode_is_float(mode)) {
3917 return be_duplicate_node(node);
3920 assert(get_method_n_ress(tp) == 1);
3922 pn_ret_val = get_Proj_proj(ret_val);
3923 pn_ret_mem = get_Proj_proj(ret_mem);
3925 /* get the Barrier */
3926 barrier = get_Proj_pred(ret_val);
3928 /* get result input of the Barrier */
3929 ret_val = get_irn_n(barrier, pn_ret_val);
3930 new_ret_val = be_transform_node(ret_val);
3932 /* get memory input of the Barrier */
3933 ret_mem = get_irn_n(barrier, pn_ret_mem);
3934 new_ret_mem = be_transform_node(ret_mem);
3936 frame = get_irg_frame(irg);
3938 dbgi = get_irn_dbg_info(barrier);
3939 block = be_transform_node(get_nodes_block(barrier));
3941 /* store xmm0 onto stack */
3942 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3943 new_ret_mem, new_ret_val);
3944 set_ia32_ls_mode(sse_store, mode);
3945 set_ia32_op_type(sse_store, ia32_AddrModeD);
3946 set_ia32_use_frame(sse_store);
3948 /* load into x87 register */
3949 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3950 set_ia32_op_type(fld, ia32_AddrModeS);
3951 set_ia32_use_frame(fld);
3953 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3954 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3956 /* create a new barrier */
3957 arity = get_irn_arity(barrier);
3958 in = ALLOCAN(ir_node*, arity);
3959 for (i = 0; i < arity; ++i) {
3962 if (i == pn_ret_val) {
3964 } else if (i == pn_ret_mem) {
3967 ir_node *in = get_irn_n(barrier, i);
3968 new_in = be_transform_node(in);
3973 new_barrier = new_ir_node(dbgi, irg, block,
3974 get_irn_op(barrier), get_irn_mode(barrier),
3976 copy_node_attr(irg, barrier, new_barrier);
3977 be_duplicate_deps(barrier, new_barrier);
3978 be_set_transformed_node(barrier, new_barrier);
3980 /* transform normally */
3981 return be_duplicate_node(node);
3985 * Transform a be_AddSP into an ia32_SubSP.
3987 static ir_node *gen_be_AddSP(ir_node *node)
3989 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3990 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3992 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3993 match_am | match_immediate);
3997 * Transform a be_SubSP into an ia32_AddSP
3999 static ir_node *gen_be_SubSP(ir_node *node)
4001 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4002 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4004 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4005 match_am | match_immediate);
4009 * Change some phi modes
4011 static ir_node *gen_Phi(ir_node *node)
4013 const arch_register_req_t *req;
4014 ir_node *block = be_transform_node(get_nodes_block(node));
4015 ir_graph *irg = current_ir_graph;
4016 dbg_info *dbgi = get_irn_dbg_info(node);
4017 ir_mode *mode = get_irn_mode(node);
4020 if (ia32_mode_needs_gp_reg(mode)) {
4021 /* we shouldn't have any 64bit stuff around anymore */
4022 assert(get_mode_size_bits(mode) <= 32);
4023 /* all integer operations are on 32bit registers now */
4025 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4026 } else if (mode_is_float(mode)) {
4027 if (ia32_cg_config.use_sse2) {
4029 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4032 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4035 req = arch_no_register_req;
4038 /* phi nodes allow loops, so we use the old arguments for now
4039 * and fix this later */
4040 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4041 get_irn_in(node) + 1);
4042 copy_node_attr(irg, node, phi);
4043 be_duplicate_deps(node, phi);
4045 arch_set_out_register_req(phi, 0, req);
4047 be_enqueue_preds(node);
4052 static ir_node *gen_Jmp(ir_node *node)
4054 ir_node *block = get_nodes_block(node);
4055 ir_node *new_block = be_transform_node(block);
4056 dbg_info *dbgi = get_irn_dbg_info(node);
4059 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4060 SET_IA32_ORIG_NODE(new_node, node);
4068 static ir_node *gen_IJmp(ir_node *node)
4070 ir_node *block = get_nodes_block(node);
4071 ir_node *new_block = be_transform_node(block);
4072 dbg_info *dbgi = get_irn_dbg_info(node);
4073 ir_node *op = get_IJmp_target(node);
4075 ia32_address_mode_t am;
4076 ia32_address_t *addr = &am.addr;
4078 assert(get_irn_mode(op) == mode_P);
4080 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4082 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4083 addr->mem, am.new_op2);
4084 set_am_attributes(new_node, &am);
4085 SET_IA32_ORIG_NODE(new_node, node);
4087 new_node = fix_mem_proj(new_node, &am);
4093 * Transform a Bound node.
4095 static ir_node *gen_Bound(ir_node *node)
4098 ir_node *lower = get_Bound_lower(node);
4099 dbg_info *dbgi = get_irn_dbg_info(node);
4101 if (is_Const_0(lower)) {
4102 /* typical case for Java */
4103 ir_node *sub, *res, *flags, *block;
4105 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4107 match_mode_neutral | match_am | match_immediate);
4109 block = get_nodes_block(res);
4110 if (! is_Proj(res)) {
4112 set_irn_mode(sub, mode_T);
4113 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4115 sub = get_Proj_pred(res);
4117 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4118 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4119 SET_IA32_ORIG_NODE(new_node, node);
4121 panic("generic Bound not supported in ia32 Backend");
4127 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4129 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4130 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4132 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4133 match_immediate | match_mode_neutral);
4136 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4138 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4139 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4140 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4144 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4146 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4147 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4148 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4152 static ir_node *gen_ia32_l_Add(ir_node *node)
4154 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4155 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4156 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4157 match_commutative | match_am | match_immediate |
4158 match_mode_neutral);
4160 if (is_Proj(lowered)) {
4161 lowered = get_Proj_pred(lowered);
4163 assert(is_ia32_Add(lowered));
4164 set_irn_mode(lowered, mode_T);
4170 static ir_node *gen_ia32_l_Adc(ir_node *node)
4172 return gen_binop_flags(node, new_bd_ia32_Adc,
4173 match_commutative | match_am | match_immediate |
4174 match_mode_neutral);
4178 * Transforms a l_MulS into a "real" MulS node.
4180 * @return the created ia32 Mul node
4182 static ir_node *gen_ia32_l_Mul(ir_node *node)
4184 ir_node *left = get_binop_left(node);
4185 ir_node *right = get_binop_right(node);
4187 return gen_binop(node, left, right, new_bd_ia32_Mul,
4188 match_commutative | match_am | match_mode_neutral);
4192 * Transforms a l_IMulS into a "real" IMul1OPS node.
4194 * @return the created ia32 IMul1OP node
4196 static ir_node *gen_ia32_l_IMul(ir_node *node)
4198 ir_node *left = get_binop_left(node);
4199 ir_node *right = get_binop_right(node);
4201 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4202 match_commutative | match_am | match_mode_neutral);
4205 static ir_node *gen_ia32_l_Sub(ir_node *node)
4207 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4208 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4209 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4210 match_am | match_immediate | match_mode_neutral);
4212 if (is_Proj(lowered)) {
4213 lowered = get_Proj_pred(lowered);
4215 assert(is_ia32_Sub(lowered));
4216 set_irn_mode(lowered, mode_T);
4222 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4224 return gen_binop_flags(node, new_bd_ia32_Sbb,
4225 match_am | match_immediate | match_mode_neutral);
4229 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4230 * op1 - target to be shifted
4231 * op2 - contains bits to be shifted into target
4233 * Only op3 can be an immediate.
4235 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4236 ir_node *low, ir_node *count)
4238 ir_node *block = get_nodes_block(node);
4239 ir_node *new_block = be_transform_node(block);
4240 dbg_info *dbgi = get_irn_dbg_info(node);
4241 ir_node *new_high = be_transform_node(high);
4242 ir_node *new_low = be_transform_node(low);
4246 /* the shift amount can be any mode that is bigger than 5 bits, since all
4247 * other bits are ignored anyway */
4248 while (is_Conv(count) &&
4249 get_irn_n_edges(count) == 1 &&
4250 mode_is_int(get_irn_mode(count))) {
4251 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4252 count = get_Conv_op(count);
4254 new_count = create_immediate_or_transform(count, 0);
4256 if (is_ia32_l_ShlD(node)) {
4257 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4260 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4263 SET_IA32_ORIG_NODE(new_node, node);
4268 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4270 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4271 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4272 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4273 return gen_lowered_64bit_shifts(node, high, low, count);
4276 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4278 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4279 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4280 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4281 return gen_lowered_64bit_shifts(node, high, low, count);
4284 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4286 ir_node *src_block = get_nodes_block(node);
4287 ir_node *block = be_transform_node(src_block);
4288 ir_graph *irg = current_ir_graph;
4289 dbg_info *dbgi = get_irn_dbg_info(node);
4290 ir_node *frame = get_irg_frame(irg);
4291 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4292 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4293 ir_node *new_val_low = be_transform_node(val_low);
4294 ir_node *new_val_high = be_transform_node(val_high);
4296 ir_node *sync, *fild, *res;
4297 ir_node *store_low, *store_high;
4299 if (ia32_cg_config.use_sse2) {
4300 panic("ia32_l_LLtoFloat not implemented for SSE2");
4304 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4306 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4308 SET_IA32_ORIG_NODE(store_low, node);
4309 SET_IA32_ORIG_NODE(store_high, node);
4311 set_ia32_use_frame(store_low);
4312 set_ia32_use_frame(store_high);
4313 set_ia32_op_type(store_low, ia32_AddrModeD);
4314 set_ia32_op_type(store_high, ia32_AddrModeD);
4315 set_ia32_ls_mode(store_low, mode_Iu);
4316 set_ia32_ls_mode(store_high, mode_Is);
4317 add_ia32_am_offs_int(store_high, 4);
4321 sync = new_rd_Sync(dbgi, block, 2, in);
4324 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4326 set_ia32_use_frame(fild);
4327 set_ia32_op_type(fild, ia32_AddrModeS);
4328 set_ia32_ls_mode(fild, mode_Ls);
4330 SET_IA32_ORIG_NODE(fild, node);
4332 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4334 if (! mode_is_signed(get_irn_mode(val_high))) {
4335 ia32_address_mode_t am;
4337 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4340 am.addr.base = get_symconst_base();
4341 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4342 am.addr.mem = nomem;
4345 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4346 am.addr.use_frame = 0;
4347 am.addr.frame_entity = NULL;
4348 am.addr.symconst_sign = 0;
4349 am.ls_mode = mode_F;
4350 am.mem_proj = nomem;
4351 am.op_type = ia32_AddrModeS;
4353 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4354 am.pinned = op_pin_state_floats;
4356 am.ins_permuted = 0;
4358 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4359 am.new_op1, am.new_op2, get_fpcw());
4360 set_am_attributes(fadd, &am);
4362 set_irn_mode(fadd, mode_T);
4363 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4368 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4370 ir_node *src_block = get_nodes_block(node);
4371 ir_node *block = be_transform_node(src_block);
4372 ir_graph *irg = get_Block_irg(block);
4373 dbg_info *dbgi = get_irn_dbg_info(node);
4374 ir_node *frame = get_irg_frame(irg);
4375 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4376 ir_node *new_val = be_transform_node(val);
4377 ir_node *fist, *mem;
4379 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4380 SET_IA32_ORIG_NODE(fist, node);
4381 set_ia32_use_frame(fist);
4382 set_ia32_op_type(fist, ia32_AddrModeD);
4383 set_ia32_ls_mode(fist, mode_Ls);
4388 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4390 ir_node *block = be_transform_node(get_nodes_block(node));
4391 ir_graph *irg = get_Block_irg(block);
4392 ir_node *pred = get_Proj_pred(node);
4393 ir_node *new_pred = be_transform_node(pred);
4394 ir_node *frame = get_irg_frame(irg);
4395 dbg_info *dbgi = get_irn_dbg_info(node);
4396 long pn = get_Proj_proj(node);
4401 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4402 SET_IA32_ORIG_NODE(load, node);
4403 set_ia32_use_frame(load);
4404 set_ia32_op_type(load, ia32_AddrModeS);
4405 set_ia32_ls_mode(load, mode_Iu);
4406 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4407 * 32 bit from it with this particular load */
4408 attr = get_ia32_attr(load);
4409 attr->data.need_64bit_stackent = 1;
4411 if (pn == pn_ia32_l_FloattoLL_res_high) {
4412 add_ia32_am_offs_int(load, 4);
4414 assert(pn == pn_ia32_l_FloattoLL_res_low);
4417 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4423 * Transform the Projs of an AddSP.
4425 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4427 ir_node *pred = get_Proj_pred(node);
4428 ir_node *new_pred = be_transform_node(pred);
4429 dbg_info *dbgi = get_irn_dbg_info(node);
4430 long proj = get_Proj_proj(node);
4432 if (proj == pn_be_AddSP_sp) {
4433 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4434 pn_ia32_SubSP_stack);
4435 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4437 } else if (proj == pn_be_AddSP_res) {
4438 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4439 pn_ia32_SubSP_addr);
4440 } else if (proj == pn_be_AddSP_M) {
4441 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4444 panic("No idea how to transform proj->AddSP");
4448 * Transform the Projs of a SubSP.
4450 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4452 ir_node *pred = get_Proj_pred(node);
4453 ir_node *new_pred = be_transform_node(pred);
4454 dbg_info *dbgi = get_irn_dbg_info(node);
4455 long proj = get_Proj_proj(node);
4457 if (proj == pn_be_SubSP_sp) {
4458 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4459 pn_ia32_AddSP_stack);
4460 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4462 } else if (proj == pn_be_SubSP_M) {
4463 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4466 panic("No idea how to transform proj->SubSP");
4470 * Transform and renumber the Projs from a Load.
4472 static ir_node *gen_Proj_Load(ir_node *node)
4475 ir_node *block = be_transform_node(get_nodes_block(node));
4476 ir_node *pred = get_Proj_pred(node);
4477 dbg_info *dbgi = get_irn_dbg_info(node);
4478 long proj = get_Proj_proj(node);
4480 /* loads might be part of source address mode matches, so we don't
4481 * transform the ProjMs yet (with the exception of loads whose result is
4484 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4487 /* this is needed, because sometimes we have loops that are only
4488 reachable through the ProjM */
4489 be_enqueue_preds(node);
4490 /* do it in 2 steps, to silence firm verifier */
4491 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4492 set_Proj_proj(res, pn_ia32_mem);
4496 /* renumber the proj */
4497 new_pred = be_transform_node(pred);
4498 if (is_ia32_Load(new_pred)) {
4501 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4503 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4504 case pn_Load_X_regular:
4505 return new_rd_Jmp(dbgi, block);
4506 case pn_Load_X_except:
4507 /* This Load might raise an exception. Mark it. */
4508 set_ia32_exc_label(new_pred, 1);
4509 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4513 } else if (is_ia32_Conv_I2I(new_pred) ||
4514 is_ia32_Conv_I2I8Bit(new_pred)) {
4515 set_irn_mode(new_pred, mode_T);
4516 if (proj == pn_Load_res) {
4517 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4518 } else if (proj == pn_Load_M) {
4519 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4521 } else if (is_ia32_xLoad(new_pred)) {
4524 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4526 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4527 case pn_Load_X_regular:
4528 return new_rd_Jmp(dbgi, block);
4529 case pn_Load_X_except:
4530 /* This Load might raise an exception. Mark it. */
4531 set_ia32_exc_label(new_pred, 1);
4532 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4536 } else if (is_ia32_vfld(new_pred)) {
4539 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4542 case pn_Load_X_regular:
4543 return new_rd_Jmp(dbgi, block);
4544 case pn_Load_X_except:
4545 /* This Load might raise an exception. Mark it. */
4546 set_ia32_exc_label(new_pred, 1);
4547 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4552 /* can happen for ProJMs when source address mode happened for the
4555 /* however it should not be the result proj, as that would mean the
4556 load had multiple users and should not have been used for
4558 if (proj != pn_Load_M) {
4559 panic("internal error: transformed node not a Load");
4561 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4564 panic("No idea how to transform proj");
4568 * Transform and renumber the Projs from a Div or Mod instruction.
4570 static ir_node *gen_Proj_Div(ir_node *node)
4572 ir_node *block = be_transform_node(get_nodes_block(node));
4573 ir_node *pred = get_Proj_pred(node);
4574 ir_node *new_pred = be_transform_node(pred);
4575 dbg_info *dbgi = get_irn_dbg_info(node);
4576 long proj = get_Proj_proj(node);
4578 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4579 assert(pn_ia32_Div_div_res == pn_ia32_IDiv_div_res);
4583 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4584 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4585 } else if (is_ia32_xDiv(new_pred)) {
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4587 } else if (is_ia32_vfdiv(new_pred)) {
4588 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4590 panic("Div transformed to unexpected thing %+F", new_pred);
4593 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4594 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4595 } else if (is_ia32_xDiv(new_pred)) {
4596 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4597 } else if (is_ia32_vfdiv(new_pred)) {
4598 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4600 panic("Div transformed to unexpected thing %+F", new_pred);
4602 case pn_Div_X_regular:
4603 return new_rd_Jmp(dbgi, block);
4604 case pn_Div_X_except:
4605 set_ia32_exc_label(new_pred, 1);
4606 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4611 panic("No idea how to transform proj->Div");
4615 * Transform and renumber the Projs from a Div or Mod instruction.
4617 static ir_node *gen_Proj_Mod(ir_node *node)
4619 ir_node *pred = get_Proj_pred(node);
4620 ir_node *new_pred = be_transform_node(pred);
4621 dbg_info *dbgi = get_irn_dbg_info(node);
4622 long proj = get_Proj_proj(node);
4624 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4625 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4626 assert(pn_ia32_Div_mod_res == pn_ia32_IDiv_mod_res);
4630 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4632 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4633 case pn_Mod_X_except:
4634 set_ia32_exc_label(new_pred, 1);
4635 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4639 panic("No idea how to transform proj->Mod");
4643 * Transform and renumber the Projs from a CopyB.
4645 static ir_node *gen_Proj_CopyB(ir_node *node)
4647 ir_node *pred = get_Proj_pred(node);
4648 ir_node *new_pred = be_transform_node(pred);
4649 dbg_info *dbgi = get_irn_dbg_info(node);
4650 long proj = get_Proj_proj(node);
4654 if (is_ia32_CopyB_i(new_pred)) {
4655 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4656 } else if (is_ia32_CopyB(new_pred)) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4664 panic("No idea how to transform proj->CopyB");
4667 static ir_node *gen_be_Call(ir_node *node)
4669 dbg_info *const dbgi = get_irn_dbg_info(node);
4670 ir_node *const src_block = get_nodes_block(node);
4671 ir_node *const block = be_transform_node(src_block);
4672 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4673 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4674 ir_node *const sp = be_transform_node(src_sp);
4675 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4676 ia32_address_mode_t am;
4677 ia32_address_t *const addr = &am.addr;
4682 ir_node * eax = noreg_GP;
4683 ir_node * ecx = noreg_GP;
4684 ir_node * edx = noreg_GP;
4685 unsigned const pop = be_Call_get_pop(node);
4686 ir_type *const call_tp = be_Call_get_type(node);
4687 int old_no_pic_adjust;
4689 /* Run the x87 simulator if the call returns a float value */
4690 if (get_method_n_ress(call_tp) > 0) {
4691 ir_type *const res_type = get_method_res_type(call_tp, 0);
4692 ir_mode *const res_mode = get_type_mode(res_type);
4694 if (res_mode != NULL && mode_is_float(res_mode)) {
4695 ir_graph *irg = current_ir_graph;
4696 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4697 irg_data->do_x87_sim = 1;
4701 /* We do not want be_Call direct calls */
4702 assert(be_Call_get_entity(node) == NULL);
4704 /* special case for PIC trampoline calls */
4705 old_no_pic_adjust = ia32_no_pic_adjust;
4706 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4708 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4709 match_am | match_immediate);
4711 ia32_no_pic_adjust = old_no_pic_adjust;
4713 i = get_irn_arity(node) - 1;
4714 fpcw = be_transform_node(get_irn_n(node, i--));
4715 for (; i >= be_pos_Call_first_arg; --i) {
4716 arch_register_req_t const *const req = arch_get_register_req(node, i);
4717 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4719 assert(req->type == arch_register_req_type_limited);
4720 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4722 switch (*req->limited) {
4723 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4724 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4725 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4726 default: panic("Invalid GP register for register parameter");
4730 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4731 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4732 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4733 set_am_attributes(call, &am);
4734 call = fix_mem_proj(call, &am);
4736 if (get_irn_pinned(node) == op_pin_state_pinned)
4737 set_irn_pinned(call, op_pin_state_pinned);
4739 SET_IA32_ORIG_NODE(call, node);
4741 if (ia32_cg_config.use_sse2) {
4742 /* remember this call for post-processing */
4743 ARR_APP1(ir_node *, call_list, call);
4744 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4751 * Transform Builtin trap
4753 static ir_node *gen_trap(ir_node *node)
4755 dbg_info *dbgi = get_irn_dbg_info(node);
4756 ir_node *block = be_transform_node(get_nodes_block(node));
4757 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4759 return new_bd_ia32_UD2(dbgi, block, mem);
4763 * Transform Builtin debugbreak
4765 static ir_node *gen_debugbreak(ir_node *node)
4767 dbg_info *dbgi = get_irn_dbg_info(node);
4768 ir_node *block = be_transform_node(get_nodes_block(node));
4769 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4771 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4775 * Transform Builtin return_address
4777 static ir_node *gen_return_address(ir_node *node)
4779 ir_node *param = get_Builtin_param(node, 0);
4780 ir_node *frame = get_Builtin_param(node, 1);
4781 dbg_info *dbgi = get_irn_dbg_info(node);
4782 ir_tarval *tv = get_Const_tarval(param);
4783 unsigned long value = get_tarval_long(tv);
4785 ir_node *block = be_transform_node(get_nodes_block(node));
4786 ir_node *ptr = be_transform_node(frame);
4790 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4791 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4792 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4795 /* load the return address from this frame */
4796 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4798 set_irn_pinned(load, get_irn_pinned(node));
4799 set_ia32_op_type(load, ia32_AddrModeS);
4800 set_ia32_ls_mode(load, mode_Iu);
4802 set_ia32_am_offs_int(load, 0);
4803 set_ia32_use_frame(load);
4804 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4806 if (get_irn_pinned(node) == op_pin_state_floats) {
4807 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4808 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4809 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4810 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4813 SET_IA32_ORIG_NODE(load, node);
4814 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4818 * Transform Builtin frame_address
4820 static ir_node *gen_frame_address(ir_node *node)
4822 ir_node *param = get_Builtin_param(node, 0);
4823 ir_node *frame = get_Builtin_param(node, 1);
4824 dbg_info *dbgi = get_irn_dbg_info(node);
4825 ir_tarval *tv = get_Const_tarval(param);
4826 unsigned long value = get_tarval_long(tv);
4828 ir_node *block = be_transform_node(get_nodes_block(node));
4829 ir_node *ptr = be_transform_node(frame);
4834 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4835 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4836 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4839 /* load the frame address from this frame */
4840 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4842 set_irn_pinned(load, get_irn_pinned(node));
4843 set_ia32_op_type(load, ia32_AddrModeS);
4844 set_ia32_ls_mode(load, mode_Iu);
4846 ent = ia32_get_frame_address_entity();
4848 set_ia32_am_offs_int(load, 0);
4849 set_ia32_use_frame(load);
4850 set_ia32_frame_ent(load, ent);
4852 /* will fail anyway, but gcc does this: */
4853 set_ia32_am_offs_int(load, 0);
4856 if (get_irn_pinned(node) == op_pin_state_floats) {
4857 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4858 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4859 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4860 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4863 SET_IA32_ORIG_NODE(load, node);
4864 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4868 * Transform Builtin frame_address
4870 static ir_node *gen_prefetch(ir_node *node)
4873 ir_node *ptr, *block, *mem, *base, *index;
4874 ir_node *param, *new_node;
4877 ia32_address_t addr;
4879 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4880 /* no prefetch at all, route memory */
4881 return be_transform_node(get_Builtin_mem(node));
4884 param = get_Builtin_param(node, 1);
4885 tv = get_Const_tarval(param);
4886 rw = get_tarval_long(tv);
4888 /* construct load address */
4889 memset(&addr, 0, sizeof(addr));
4890 ptr = get_Builtin_param(node, 0);
4891 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4898 base = be_transform_node(base);
4901 if (index == NULL) {
4904 index = be_transform_node(index);
4907 dbgi = get_irn_dbg_info(node);
4908 block = be_transform_node(get_nodes_block(node));
4909 mem = be_transform_node(get_Builtin_mem(node));
4911 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4912 /* we have 3DNow!, this was already checked above */
4913 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4914 } else if (ia32_cg_config.use_sse_prefetch) {
4915 /* note: rw == 1 is IGNORED in that case */
4916 param = get_Builtin_param(node, 2);
4917 tv = get_Const_tarval(param);
4918 locality = get_tarval_long(tv);
4920 /* SSE style prefetch */
4923 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4926 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4929 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4932 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4936 assert(ia32_cg_config.use_3dnow_prefetch);
4937 /* 3DNow! style prefetch */
4938 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4941 set_irn_pinned(new_node, get_irn_pinned(node));
4942 set_ia32_op_type(new_node, ia32_AddrModeS);
4943 set_ia32_ls_mode(new_node, mode_Bu);
4944 set_address(new_node, &addr);
4946 SET_IA32_ORIG_NODE(new_node, node);
4948 be_dep_on_frame(new_node);
4949 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4953 * Transform bsf like node
4955 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4957 ir_node *param = get_Builtin_param(node, 0);
4958 dbg_info *dbgi = get_irn_dbg_info(node);
4960 ir_node *block = get_nodes_block(node);
4961 ir_node *new_block = be_transform_node(block);
4963 ia32_address_mode_t am;
4964 ia32_address_t *addr = &am.addr;
4967 match_arguments(&am, block, NULL, param, NULL, match_am);
4969 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4970 set_am_attributes(cnt, &am);
4971 set_ia32_ls_mode(cnt, get_irn_mode(param));
4973 SET_IA32_ORIG_NODE(cnt, node);
4974 return fix_mem_proj(cnt, &am);
4978 * Transform builtin ffs.
4980 static ir_node *gen_ffs(ir_node *node)
4982 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4983 ir_node *real = skip_Proj(bsf);
4984 dbg_info *dbgi = get_irn_dbg_info(real);
4985 ir_node *block = get_nodes_block(real);
4986 ir_node *flag, *set, *conv, *neg, *orn;
4989 if (get_irn_mode(real) != mode_T) {
4990 set_irn_mode(real, mode_T);
4991 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
4994 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
4997 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
4998 SET_IA32_ORIG_NODE(set, node);
5001 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5002 SET_IA32_ORIG_NODE(conv, node);
5005 neg = new_bd_ia32_Neg(dbgi, block, conv);
5008 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5009 set_ia32_commutative(orn);
5012 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, orn, ia32_create_Immediate(NULL, 0, 1));
5016 * Transform builtin clz.
5018 static ir_node *gen_clz(ir_node *node)
5020 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5021 ir_node *real = skip_Proj(bsr);
5022 dbg_info *dbgi = get_irn_dbg_info(real);
5023 ir_node *block = get_nodes_block(real);
5024 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5026 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5030 * Transform builtin ctz.
5032 static ir_node *gen_ctz(ir_node *node)
5034 return gen_unop_AM(node, new_bd_ia32_Bsf);
5038 * Transform builtin parity.
5040 static ir_node *gen_parity(ir_node *node)
5042 ir_node *param = get_Builtin_param(node, 0);
5043 dbg_info *dbgi = get_irn_dbg_info(node);
5045 ir_node *block = get_nodes_block(node);
5047 ir_node *new_block = be_transform_node(block);
5048 ir_node *imm, *cmp, *new_node;
5050 ia32_address_mode_t am;
5051 ia32_address_t *addr = &am.addr;
5055 match_arguments(&am, block, NULL, param, NULL, match_am);
5056 imm = ia32_create_Immediate(NULL, 0, 0);
5057 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5058 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5059 set_am_attributes(cmp, &am);
5060 set_ia32_ls_mode(cmp, mode_Iu);
5062 SET_IA32_ORIG_NODE(cmp, node);
5064 cmp = fix_mem_proj(cmp, &am);
5067 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5068 SET_IA32_ORIG_NODE(new_node, node);
5071 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5072 nomem, new_node, mode_Bu);
5073 SET_IA32_ORIG_NODE(new_node, node);
5078 * Transform builtin popcount
5080 static ir_node *gen_popcount(ir_node *node)
5082 ir_node *param = get_Builtin_param(node, 0);
5083 dbg_info *dbgi = get_irn_dbg_info(node);
5085 ir_node *block = get_nodes_block(node);
5086 ir_node *new_block = be_transform_node(block);
5089 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5091 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5092 if (ia32_cg_config.use_popcnt) {
5093 ia32_address_mode_t am;
5094 ia32_address_t *addr = &am.addr;
5097 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5099 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5100 set_am_attributes(cnt, &am);
5101 set_ia32_ls_mode(cnt, get_irn_mode(param));
5103 SET_IA32_ORIG_NODE(cnt, node);
5104 return fix_mem_proj(cnt, &am);
5107 new_param = be_transform_node(param);
5109 /* do the standard popcount algo */
5111 /* m1 = x & 0x55555555 */
5112 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5113 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5116 simm = ia32_create_Immediate(NULL, 0, 1);
5117 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5119 /* m2 = s1 & 0x55555555 */
5120 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5123 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5125 /* m4 = m3 & 0x33333333 */
5126 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5127 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5130 simm = ia32_create_Immediate(NULL, 0, 2);
5131 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5133 /* m5 = s2 & 0x33333333 */
5134 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5137 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5139 /* m7 = m6 & 0x0F0F0F0F */
5140 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5141 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5144 simm = ia32_create_Immediate(NULL, 0, 4);
5145 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5147 /* m8 = s3 & 0x0F0F0F0F */
5148 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5151 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5153 /* m10 = m9 & 0x00FF00FF */
5154 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5155 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5158 simm = ia32_create_Immediate(NULL, 0, 8);
5159 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5161 /* m11 = s4 & 0x00FF00FF */
5162 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5164 /* m12 = m10 + m11 */
5165 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5167 /* m13 = m12 & 0x0000FFFF */
5168 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5169 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5171 /* s5 = m12 >> 16 */
5172 simm = ia32_create_Immediate(NULL, 0, 16);
5173 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5175 /* res = m13 + s5 */
5176 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5180 * Transform builtin byte swap.
5182 static ir_node *gen_bswap(ir_node *node)
5184 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5185 dbg_info *dbgi = get_irn_dbg_info(node);
5187 ir_node *block = get_nodes_block(node);
5188 ir_node *new_block = be_transform_node(block);
5189 ir_mode *mode = get_irn_mode(param);
5190 unsigned size = get_mode_size_bits(mode);
5191 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5195 if (ia32_cg_config.use_i486) {
5196 /* swap available */
5197 return new_bd_ia32_Bswap(dbgi, new_block, param);
5199 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5200 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5202 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5203 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5205 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5207 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5208 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5210 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5211 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5214 /* swap16 always available */
5215 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5218 panic("Invalid bswap size (%d)", size);
5223 * Transform builtin outport.
5225 static ir_node *gen_outport(ir_node *node)
5227 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5228 ir_node *oldv = get_Builtin_param(node, 1);
5229 ir_mode *mode = get_irn_mode(oldv);
5230 ir_node *value = be_transform_node(oldv);
5231 ir_node *block = be_transform_node(get_nodes_block(node));
5232 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5233 dbg_info *dbgi = get_irn_dbg_info(node);
5235 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5236 set_ia32_ls_mode(res, mode);
5241 * Transform builtin inport.
5243 static ir_node *gen_inport(ir_node *node)
5245 ir_type *tp = get_Builtin_type(node);
5246 ir_type *rstp = get_method_res_type(tp, 0);
5247 ir_mode *mode = get_type_mode(rstp);
5248 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5249 ir_node *block = be_transform_node(get_nodes_block(node));
5250 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5251 dbg_info *dbgi = get_irn_dbg_info(node);
5253 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5254 set_ia32_ls_mode(res, mode);
5256 /* check for missing Result Proj */
5261 * Transform a builtin inner trampoline
5263 static ir_node *gen_inner_trampoline(ir_node *node)
5265 ir_node *ptr = get_Builtin_param(node, 0);
5266 ir_node *callee = get_Builtin_param(node, 1);
5267 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5268 ir_node *mem = get_Builtin_mem(node);
5269 ir_node *block = get_nodes_block(node);
5270 ir_node *new_block = be_transform_node(block);
5274 ir_node *trampoline;
5276 dbg_info *dbgi = get_irn_dbg_info(node);
5277 ia32_address_t addr;
5279 /* construct store address */
5280 memset(&addr, 0, sizeof(addr));
5281 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5283 if (addr.base == NULL) {
5284 addr.base = noreg_GP;
5286 addr.base = be_transform_node(addr.base);
5289 if (addr.index == NULL) {
5290 addr.index = noreg_GP;
5292 addr.index = be_transform_node(addr.index);
5294 addr.mem = be_transform_node(mem);
5296 /* mov ecx, <env> */
5297 val = ia32_create_Immediate(NULL, 0, 0xB9);
5298 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5299 addr.index, addr.mem, val);
5300 set_irn_pinned(store, get_irn_pinned(node));
5301 set_ia32_op_type(store, ia32_AddrModeD);
5302 set_ia32_ls_mode(store, mode_Bu);
5303 set_address(store, &addr);
5307 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5308 addr.index, addr.mem, env);
5309 set_irn_pinned(store, get_irn_pinned(node));
5310 set_ia32_op_type(store, ia32_AddrModeD);
5311 set_ia32_ls_mode(store, mode_Iu);
5312 set_address(store, &addr);
5316 /* jmp rel <callee> */
5317 val = ia32_create_Immediate(NULL, 0, 0xE9);
5318 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5319 addr.index, addr.mem, val);
5320 set_irn_pinned(store, get_irn_pinned(node));
5321 set_ia32_op_type(store, ia32_AddrModeD);
5322 set_ia32_ls_mode(store, mode_Bu);
5323 set_address(store, &addr);
5327 trampoline = be_transform_node(ptr);
5329 /* the callee is typically an immediate */
5330 if (is_SymConst(callee)) {
5331 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5333 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5335 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5337 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5338 addr.index, addr.mem, rel);
5339 set_irn_pinned(store, get_irn_pinned(node));
5340 set_ia32_op_type(store, ia32_AddrModeD);
5341 set_ia32_ls_mode(store, mode_Iu);
5342 set_address(store, &addr);
5347 return new_r_Tuple(new_block, 2, in);
5351 * Transform Builtin node.
5353 static ir_node *gen_Builtin(ir_node *node)
5355 ir_builtin_kind kind = get_Builtin_kind(node);
5359 return gen_trap(node);
5360 case ir_bk_debugbreak:
5361 return gen_debugbreak(node);
5362 case ir_bk_return_address:
5363 return gen_return_address(node);
5364 case ir_bk_frame_address:
5365 return gen_frame_address(node);
5366 case ir_bk_prefetch:
5367 return gen_prefetch(node);
5369 return gen_ffs(node);
5371 return gen_clz(node);
5373 return gen_ctz(node);
5375 return gen_parity(node);
5376 case ir_bk_popcount:
5377 return gen_popcount(node);
5379 return gen_bswap(node);
5381 return gen_outport(node);
5383 return gen_inport(node);
5384 case ir_bk_inner_trampoline:
5385 return gen_inner_trampoline(node);
5387 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5391 * Transform Proj(Builtin) node.
5393 static ir_node *gen_Proj_Builtin(ir_node *proj)
5395 ir_node *node = get_Proj_pred(proj);
5396 ir_node *new_node = be_transform_node(node);
5397 ir_builtin_kind kind = get_Builtin_kind(node);
5400 case ir_bk_return_address:
5401 case ir_bk_frame_address:
5406 case ir_bk_popcount:
5408 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5411 case ir_bk_debugbreak:
5412 case ir_bk_prefetch:
5414 assert(get_Proj_proj(proj) == pn_Builtin_M);
5417 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5418 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5420 assert(get_Proj_proj(proj) == pn_Builtin_M);
5421 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5423 case ir_bk_inner_trampoline:
5424 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5425 return get_Tuple_pred(new_node, 1);
5427 assert(get_Proj_proj(proj) == pn_Builtin_M);
5428 return get_Tuple_pred(new_node, 0);
5431 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5434 static ir_node *gen_be_IncSP(ir_node *node)
5436 ir_node *res = be_duplicate_node(node);
5437 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5443 * Transform the Projs from a be_Call.
5445 static ir_node *gen_Proj_be_Call(ir_node *node)
5447 ir_node *call = get_Proj_pred(node);
5448 ir_node *new_call = be_transform_node(call);
5449 dbg_info *dbgi = get_irn_dbg_info(node);
5450 long proj = get_Proj_proj(node);
5451 ir_mode *mode = get_irn_mode(node);
5454 if (proj == pn_be_Call_M_regular) {
5455 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5457 /* transform call modes */
5458 if (mode_is_data(mode)) {
5459 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5463 /* Map from be_Call to ia32_Call proj number */
5464 if (proj == pn_be_Call_sp) {
5465 proj = pn_ia32_Call_stack;
5466 } else if (proj == pn_be_Call_M_regular) {
5467 proj = pn_ia32_Call_M;
5469 arch_register_req_t const *const req = arch_get_register_req_out(node);
5470 int const n_outs = arch_irn_get_n_outs(new_call);
5473 assert(proj >= pn_be_Call_first_res);
5474 assert(req->type & arch_register_req_type_limited);
5476 for (i = 0; i < n_outs; ++i) {
5477 arch_register_req_t const *const new_req
5478 = arch_get_out_register_req(new_call, i);
5480 if (!(new_req->type & arch_register_req_type_limited) ||
5481 new_req->cls != req->cls ||
5482 *new_req->limited != *req->limited)
5491 res = new_rd_Proj(dbgi, new_call, mode, proj);
5493 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5495 case pn_ia32_Call_stack:
5496 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5499 case pn_ia32_Call_fpcw:
5500 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5508 * Transform the Projs from a Cmp.
5510 static ir_node *gen_Proj_Cmp(ir_node *node)
5512 /* this probably means not all mode_b nodes were lowered... */
5513 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5518 * Transform the Projs from a Bound.
5520 static ir_node *gen_Proj_Bound(ir_node *node)
5523 ir_node *pred = get_Proj_pred(node);
5525 switch (get_Proj_proj(node)) {
5527 return be_transform_node(get_Bound_mem(pred));
5528 case pn_Bound_X_regular:
5529 new_node = be_transform_node(pred);
5530 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5531 case pn_Bound_X_except:
5532 new_node = be_transform_node(pred);
5533 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5535 return be_transform_node(get_Bound_index(pred));
5537 panic("unsupported Proj from Bound");
5541 static ir_node *gen_Proj_ASM(ir_node *node)
5543 ir_mode *mode = get_irn_mode(node);
5544 ir_node *pred = get_Proj_pred(node);
5545 ir_node *new_pred = be_transform_node(pred);
5546 long pos = get_Proj_proj(node);
5548 if (mode == mode_M) {
5549 pos = arch_irn_get_n_outs(new_pred)-1;
5550 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5552 } else if (mode_is_float(mode)) {
5555 panic("unexpected proj mode at ASM");
5558 return new_r_Proj(new_pred, mode, pos);
5562 * Transform and potentially renumber Proj nodes.
5564 static ir_node *gen_Proj(ir_node *node)
5566 ir_node *pred = get_Proj_pred(node);
5569 switch (get_irn_opcode(pred)) {
5571 proj = get_Proj_proj(node);
5572 if (proj == pn_Store_M) {
5573 return be_transform_node(pred);
5575 panic("No idea how to transform proj->Store");
5578 return gen_Proj_Load(node);
5580 return gen_Proj_ASM(node);
5582 return gen_Proj_Builtin(node);
5584 return gen_Proj_Div(node);
5586 return gen_Proj_Mod(node);
5588 return gen_Proj_CopyB(node);
5590 return gen_Proj_be_SubSP(node);
5592 return gen_Proj_be_AddSP(node);
5594 return gen_Proj_be_Call(node);
5596 return gen_Proj_Cmp(node);
5598 return gen_Proj_Bound(node);
5600 proj = get_Proj_proj(node);
5602 case pn_Start_X_initial_exec: {
5603 ir_node *block = get_nodes_block(pred);
5604 ir_node *new_block = be_transform_node(block);
5605 dbg_info *dbgi = get_irn_dbg_info(node);
5606 /* we exchange the ProjX with a jump */
5607 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5612 case pn_Start_P_tls:
5613 return ia32_gen_Proj_tls(node);
5618 if (is_ia32_l_FloattoLL(pred)) {
5619 return gen_Proj_l_FloattoLL(node);
5621 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5625 ir_mode *mode = get_irn_mode(node);
5626 if (ia32_mode_needs_gp_reg(mode)) {
5627 ir_node *new_pred = be_transform_node(pred);
5628 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5629 get_Proj_proj(node));
5630 new_proj->node_nr = node->node_nr;
5635 return be_duplicate_node(node);
5639 * Enters all transform functions into the generic pointer
5641 static void register_transformers(void)
5643 /* first clear the generic function pointer for all ops */
5644 be_start_transform_setup();
5646 be_set_transform_function(op_Add, gen_Add);
5647 be_set_transform_function(op_And, gen_And);
5648 be_set_transform_function(op_ASM, ia32_gen_ASM);
5649 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5650 be_set_transform_function(op_be_Call, gen_be_Call);
5651 be_set_transform_function(op_be_Copy, gen_be_Copy);
5652 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5653 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5654 be_set_transform_function(op_be_Return, gen_be_Return);
5655 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5656 be_set_transform_function(op_Bound, gen_Bound);
5657 be_set_transform_function(op_Builtin, gen_Builtin);
5658 be_set_transform_function(op_Cmp, gen_Cmp);
5659 be_set_transform_function(op_Cond, gen_Cond);
5660 be_set_transform_function(op_Const, gen_Const);
5661 be_set_transform_function(op_Conv, gen_Conv);
5662 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5663 be_set_transform_function(op_Div, gen_Div);
5664 be_set_transform_function(op_Eor, gen_Eor);
5665 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5666 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5667 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5668 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5669 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5670 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5671 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5672 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5673 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5674 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5675 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5676 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5677 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5678 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5679 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5680 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5681 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5682 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5683 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5684 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5685 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5686 be_set_transform_function(op_IJmp, gen_IJmp);
5687 be_set_transform_function(op_Jmp, gen_Jmp);
5688 be_set_transform_function(op_Load, gen_Load);
5689 be_set_transform_function(op_Minus, gen_Minus);
5690 be_set_transform_function(op_Mod, gen_Mod);
5691 be_set_transform_function(op_Mul, gen_Mul);
5692 be_set_transform_function(op_Mulh, gen_Mulh);
5693 be_set_transform_function(op_Mux, gen_Mux);
5694 be_set_transform_function(op_Not, gen_Not);
5695 be_set_transform_function(op_Or, gen_Or);
5696 be_set_transform_function(op_Phi, gen_Phi);
5697 be_set_transform_function(op_Proj, gen_Proj);
5698 be_set_transform_function(op_Rotl, gen_Rotl);
5699 be_set_transform_function(op_Shl, gen_Shl);
5700 be_set_transform_function(op_Shr, gen_Shr);
5701 be_set_transform_function(op_Shrs, gen_Shrs);
5702 be_set_transform_function(op_Store, gen_Store);
5703 be_set_transform_function(op_Sub, gen_Sub);
5704 be_set_transform_function(op_SymConst, gen_SymConst);
5705 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5709 * Pre-transform all unknown and noreg nodes.
5711 static void ia32_pretransform_node(void)
5713 ir_graph *irg = current_ir_graph;
5714 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5716 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5717 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5718 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5720 nomem = get_irg_no_mem(irg);
5721 noreg_GP = ia32_new_NoReg_gp(irg);
5727 * Post-process all calls if we are in SSE mode.
5728 * The ABI requires that the results are in st0, copy them
5729 * to a xmm register.
5731 static void postprocess_fp_call_results(void)
5735 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5736 ir_node *call = call_list[i];
5737 ir_type *mtp = call_types[i];
5740 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5741 ir_type *res_tp = get_method_res_type(mtp, j);
5742 ir_node *res, *new_res;
5743 const ir_edge_t *edge, *next;
5746 if (! is_atomic_type(res_tp)) {
5747 /* no floating point return */
5750 mode = get_type_mode(res_tp);
5751 if (! mode_is_float(mode)) {
5752 /* no floating point return */
5756 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5759 /* now patch the users */
5760 foreach_out_edge_safe(res, edge, next) {
5761 ir_node *succ = get_edge_src_irn(edge);
5764 if (be_is_Keep(succ))
5767 if (is_ia32_xStore(succ)) {
5768 /* an xStore can be patched into an vfst */
5769 dbg_info *db = get_irn_dbg_info(succ);
5770 ir_node *block = get_nodes_block(succ);
5771 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5772 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5773 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5774 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5775 ir_mode *mode = get_ia32_ls_mode(succ);
5777 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5778 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5779 if (is_ia32_use_frame(succ))
5780 set_ia32_use_frame(st);
5781 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5782 set_irn_pinned(st, get_irn_pinned(succ));
5783 set_ia32_op_type(st, ia32_AddrModeD);
5787 if (new_res == NULL) {
5788 dbg_info *db = get_irn_dbg_info(call);
5789 ir_node *block = get_nodes_block(call);
5790 ir_node *frame = get_irg_frame(current_ir_graph);
5791 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5792 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5793 ir_node *vfst, *xld, *new_mem;
5795 /* store st(0) on stack */
5796 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5797 set_ia32_op_type(vfst, ia32_AddrModeD);
5798 set_ia32_use_frame(vfst);
5800 /* load into SSE register */
5801 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5802 set_ia32_op_type(xld, ia32_AddrModeS);
5803 set_ia32_use_frame(xld);
5805 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5806 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5808 if (old_mem != NULL) {
5809 edges_reroute(old_mem, new_mem, current_ir_graph);
5813 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5820 /* do the transformation */
5821 void ia32_transform_graph(ir_graph *irg)
5825 register_transformers();
5826 initial_fpcw = NULL;
5827 ia32_no_pic_adjust = 0;
5829 be_timer_push(T_HEIGHTS);
5830 ia32_heights = heights_new(irg);
5831 be_timer_pop(T_HEIGHTS);
5832 ia32_calculate_non_address_mode_nodes(irg);
5834 /* the transform phase is not safe for CSE (yet) because several nodes get
5835 * attributes set after their creation */
5836 cse_last = get_opt_cse();
5839 call_list = NEW_ARR_F(ir_node *, 0);
5840 call_types = NEW_ARR_F(ir_type *, 0);
5841 be_transform_graph(irg, ia32_pretransform_node);
5843 if (ia32_cg_config.use_sse2)
5844 postprocess_fp_call_results();
5845 DEL_ARR_F(call_types);
5846 DEL_ARR_F(call_list);
5848 set_opt_cse(cse_last);
5850 ia32_free_non_address_mode_nodes();
5851 heights_free(ia32_heights);
5852 ia32_heights = NULL;
5855 void ia32_init_transform(void)
5857 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");