2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_util.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *initial_fpcw = NULL;
94 int ia32_no_pic_adjust;
96 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
100 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
110 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem);
113 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
114 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
117 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
119 static ir_node *create_immediate_or_transform(ir_node *node,
120 char immediate_constraint_type);
122 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
123 dbg_info *dbgi, ir_node *block,
124 ir_node *op, ir_node *orig_node);
126 /* its enough to have those once */
127 static ir_node *nomem, *noreg_GP;
129 /** a list to postprocess all calls */
130 static ir_node **call_list;
131 static ir_type **call_types;
133 /** Return non-zero is a node represents the 0 constant. */
134 static bool is_Const_0(ir_node *node)
136 return is_Const(node) && is_Const_null(node);
139 /** Return non-zero is a node represents the 1 constant. */
140 static bool is_Const_1(ir_node *node)
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node)
148 return is_Const(node) && is_Const_all_one(node);
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_x87_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 if (tarval_is_null(tv) || tarval_is_one(tv))
160 /* TODO: match all the other float constants */
165 * returns true if constant can be created with a simple float command
167 static bool is_simple_sse_Const(ir_node *node)
169 ir_tarval *tv = get_Const_tarval(node);
170 ir_mode *mode = get_tarval_mode(tv);
175 if (tarval_is_null(tv)
176 #ifdef CONSTRUCT_SSE_CONST
181 #ifdef CONSTRUCT_SSE_CONST
182 if (mode == mode_D) {
183 unsigned val = get_tarval_sub_bits(tv, 0) |
184 (get_tarval_sub_bits(tv, 1) << 8) |
185 (get_tarval_sub_bits(tv, 2) << 16) |
186 (get_tarval_sub_bits(tv, 3) << 24);
188 /* lower 32bit are zero, really a 32bit constant */
191 #endif /* CONSTRUCT_SSE_CONST */
192 /* TODO: match all the other float constants */
197 * return NoREG or pic_base in case of PIC.
198 * This is necessary as base address for newly created symbols
200 static ir_node *get_symconst_base(void)
202 ir_graph *irg = current_ir_graph;
204 if (be_get_irg_options(irg)->pic) {
205 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
206 return arch_env->impl->get_pic_base(irg);
213 * Transforms a Const.
215 static ir_node *gen_Const(ir_node *node)
217 ir_node *old_block = get_nodes_block(node);
218 ir_node *block = be_transform_node(old_block);
219 dbg_info *dbgi = get_irn_dbg_info(node);
220 ir_mode *mode = get_irn_mode(node);
222 assert(is_Const(node));
224 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
262 #ifdef CONSTRUCT_SSE_CONST
263 if (mode == mode_D) {
264 unsigned val = get_tarval_sub_bits(tv, 0) |
265 (get_tarval_sub_bits(tv, 1) << 8) |
266 (get_tarval_sub_bits(tv, 2) << 16) |
267 (get_tarval_sub_bits(tv, 3) << 24);
269 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
270 ir_node *cnst, *psllq;
272 /* fine, lower 32bit are zero, produce 32bit value */
273 val = get_tarval_sub_bits(tv, 4) |
274 (get_tarval_sub_bits(tv, 5) << 8) |
275 (get_tarval_sub_bits(tv, 6) << 16) |
276 (get_tarval_sub_bits(tv, 7) << 24);
277 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
278 load = new_bd_ia32_xMovd(dbgi, block, cnst);
279 set_ia32_ls_mode(load, mode);
280 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
281 set_ia32_ls_mode(psllq, mode);
286 #endif /* CONSTRUCT_SSE_CONST */
287 floatent = ia32_create_float_const_entity(node);
289 base = get_symconst_base();
290 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
292 set_ia32_op_type(load, ia32_AddrModeS);
293 set_ia32_am_sc(load, floatent);
294 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
295 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
298 if (is_Const_null(node)) {
299 load = new_bd_ia32_vfldz(dbgi, block);
301 set_ia32_ls_mode(load, mode);
302 } else if (is_Const_one(node)) {
303 load = new_bd_ia32_vfld1(dbgi, block);
305 set_ia32_ls_mode(load, mode);
310 floatent = ia32_create_float_const_entity(node);
311 /* create_float_const_ent is smart and sometimes creates
313 ls_mode = get_type_mode(get_entity_type(floatent));
314 base = get_symconst_base();
315 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
317 set_ia32_op_type(load, ia32_AddrModeS);
318 set_ia32_am_sc(load, floatent);
319 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
320 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
323 #ifdef CONSTRUCT_SSE_CONST
325 #endif /* CONSTRUCT_SSE_CONST */
326 SET_IA32_ORIG_NODE(load, node);
328 be_dep_on_frame(load);
330 } else { /* non-float mode */
332 ir_tarval *tv = get_Const_tarval(node);
335 tv = tarval_convert_to(tv, mode_Iu);
337 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
339 panic("couldn't convert constant tarval (%+F)", node);
341 val = get_tarval_long(tv);
343 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
344 SET_IA32_ORIG_NODE(cnst, node);
346 be_dep_on_frame(cnst);
352 * Transforms a SymConst.
354 static ir_node *gen_SymConst(ir_node *node)
356 ir_node *old_block = get_nodes_block(node);
357 ir_node *block = be_transform_node(old_block);
358 dbg_info *dbgi = get_irn_dbg_info(node);
359 ir_mode *mode = get_irn_mode(node);
362 if (mode_is_float(mode)) {
363 if (ia32_cg_config.use_sse2)
364 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 set_ia32_am_sc(cnst, get_SymConst_entity(node));
368 set_ia32_use_frame(cnst);
372 if (get_SymConst_kind(node) != symconst_addr_ent) {
373 panic("backend only support symconst_addr_ent (at %+F)", node);
375 entity = get_SymConst_entity(node);
376 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
379 SET_IA32_ORIG_NODE(cnst, node);
381 be_dep_on_frame(cnst);
386 * Create a float type for the given mode and cache it.
388 * @param mode the mode for the float type (might be integer mode for SSE2 types)
389 * @param align alignment
391 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
397 if (mode == mode_Iu) {
398 static ir_type *int_Iu[16] = {NULL, };
400 if (int_Iu[align] == NULL) {
401 int_Iu[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return int_Iu[align];
406 } else if (mode == mode_Lu) {
407 static ir_type *int_Lu[16] = {NULL, };
409 if (int_Lu[align] == NULL) {
410 int_Lu[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return int_Lu[align];
415 } else if (mode == mode_F) {
416 static ir_type *float_F[16] = {NULL, };
418 if (float_F[align] == NULL) {
419 float_F[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_F[align];
424 } else if (mode == mode_D) {
425 static ir_type *float_D[16] = {NULL, };
427 if (float_D[align] == NULL) {
428 float_D[align] = tp = new_type_primitive(mode);
429 /* set the specified alignment */
430 set_type_alignment_bytes(tp, align);
432 return float_D[align];
434 static ir_type *float_E[16] = {NULL, };
436 if (float_E[align] == NULL) {
437 float_E[align] = tp = new_type_primitive(mode);
438 /* set the specified alignment */
439 set_type_alignment_bytes(tp, align);
441 return float_E[align];
446 * Create a float[2] array type for the given atomic type.
448 * @param tp the atomic type
450 static ir_type *ia32_create_float_array(ir_type *tp)
452 ir_mode *mode = get_type_mode(tp);
453 unsigned align = get_type_alignment_bytes(tp);
458 if (mode == mode_F) {
459 static ir_type *float_F[16] = {NULL, };
461 if (float_F[align] != NULL)
462 return float_F[align];
463 arr = float_F[align] = new_type_array(1, tp);
464 } else if (mode == mode_D) {
465 static ir_type *float_D[16] = {NULL, };
467 if (float_D[align] != NULL)
468 return float_D[align];
469 arr = float_D[align] = new_type_array(1, tp);
471 static ir_type *float_E[16] = {NULL, };
473 if (float_E[align] != NULL)
474 return float_E[align];
475 arr = float_E[align] = new_type_array(1, tp);
477 set_type_alignment_bytes(arr, align);
478 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
479 set_type_state(arr, layout_fixed);
483 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
484 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
486 static const struct {
487 const char *ent_name;
488 const char *cnst_str;
491 } names [ia32_known_const_max] = {
492 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
493 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
494 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
495 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
496 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
498 static ir_entity *ent_cache[ia32_known_const_max];
500 const char *ent_name, *cnst_str;
506 ent_name = names[kct].ent_name;
507 if (! ent_cache[kct]) {
508 cnst_str = names[kct].cnst_str;
510 switch (names[kct].mode) {
511 case 0: mode = mode_Iu; break;
512 case 1: mode = mode_Lu; break;
513 default: mode = mode_F; break;
515 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
516 tp = ia32_create_float_type(mode, names[kct].align);
518 if (kct == ia32_ULLBIAS)
519 tp = ia32_create_float_array(tp);
520 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
522 set_entity_ld_ident(ent, get_entity_ident(ent));
523 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
524 set_entity_visibility(ent, ir_visibility_private);
526 if (kct == ia32_ULLBIAS) {
527 ir_initializer_t *initializer = create_initializer_compound(2);
529 set_initializer_compound_value(initializer, 0,
530 create_initializer_tarval(get_mode_null(mode)));
531 set_initializer_compound_value(initializer, 1,
532 create_initializer_tarval(tv));
534 set_entity_initializer(ent, initializer);
536 set_entity_initializer(ent, create_initializer_tarval(tv));
539 /* cache the entry */
540 ent_cache[kct] = ent;
543 return ent_cache[kct];
547 * return true if the node is a Proj(Load) and could be used in source address
548 * mode for another node. Will return only true if the @p other node is not
549 * dependent on the memory of the Load (for binary operations use the other
550 * input here, for unary operations use NULL).
552 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
553 ir_node *other, ir_node *other2, match_flags_t flags)
558 /* float constants are always available */
559 if (is_Const(node)) {
560 ir_mode *mode = get_irn_mode(node);
561 if (mode_is_float(mode)) {
562 if (ia32_cg_config.use_sse2) {
563 if (is_simple_sse_Const(node))
566 if (is_simple_x87_Const(node))
569 if (get_irn_n_edges(node) > 1)
577 load = get_Proj_pred(node);
578 pn = get_Proj_proj(node);
579 if (!is_Load(load) || pn != pn_Load_res)
581 if (get_nodes_block(load) != block)
583 /* we only use address mode if we're the only user of the load */
584 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
586 /* in some edge cases with address mode we might reach the load normally
587 * and through some AM sequence, if it is already materialized then we
588 * can't create an AM node from it */
589 if (be_is_transformed(node))
592 /* don't do AM if other node inputs depend on the load (via mem-proj) */
593 if (other != NULL && ia32_prevents_AM(block, load, other))
596 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
602 typedef struct ia32_address_mode_t ia32_address_mode_t;
603 struct ia32_address_mode_t {
608 ia32_op_type_t op_type;
612 unsigned commutative : 1;
613 unsigned ins_permuted : 1;
616 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
618 /* construct load address */
619 memset(addr, 0, sizeof(addr[0]));
620 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
622 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
623 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
624 addr->mem = be_transform_node(mem);
627 static void build_address(ia32_address_mode_t *am, ir_node *node,
628 ia32_create_am_flags_t flags)
630 ia32_address_t *addr = &am->addr;
636 /* floating point immediates */
637 if (is_Const(node)) {
638 ir_entity *entity = ia32_create_float_const_entity(node);
639 addr->base = get_symconst_base();
640 addr->index = noreg_GP;
642 addr->symconst_ent = entity;
644 am->ls_mode = get_type_mode(get_entity_type(entity));
645 am->pinned = op_pin_state_floats;
649 load = get_Proj_pred(node);
650 ptr = get_Load_ptr(load);
651 mem = get_Load_mem(load);
652 new_mem = be_transform_node(mem);
653 am->pinned = get_irn_pinned(load);
654 am->ls_mode = get_Load_mode(load);
655 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
658 /* construct load address */
659 ia32_create_address_mode(addr, ptr, flags);
661 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
662 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
666 static void set_address(ir_node *node, const ia32_address_t *addr)
668 set_ia32_am_scale(node, addr->scale);
669 set_ia32_am_sc(node, addr->symconst_ent);
670 set_ia32_am_offs_int(node, addr->offset);
671 if (addr->symconst_sign)
672 set_ia32_am_sc_sign(node);
674 set_ia32_use_frame(node);
675 set_ia32_frame_ent(node, addr->frame_entity);
679 * Apply attributes of a given address mode to a node.
681 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
683 set_address(node, &am->addr);
685 set_ia32_op_type(node, am->op_type);
686 set_ia32_ls_mode(node, am->ls_mode);
687 if (am->pinned == op_pin_state_pinned) {
688 /* beware: some nodes are already pinned and did not allow to change the state */
689 if (get_irn_pinned(node) != op_pin_state_pinned)
690 set_irn_pinned(node, op_pin_state_pinned);
693 set_ia32_commutative(node);
697 * Check, if a given node is a Down-Conv, ie. a integer Conv
698 * from a mode with a mode with more bits to a mode with lesser bits.
699 * Moreover, we return only true if the node has not more than 1 user.
701 * @param node the node
702 * @return non-zero if node is a Down-Conv
704 static int is_downconv(const ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
726 /** Skip all Down-Conv's on a given node and return the resulting node. */
727 ir_node *ia32_skip_downconv(ir_node *node)
729 while (is_downconv(node))
730 node = get_Conv_op(node);
735 static bool is_sameconv(ir_node *node)
743 /* we only want to skip the conv when we're the only user
744 * (because this test is used in the context of address-mode selection
745 * and we don't want to use address mode for multiple users) */
746 if (get_irn_n_edges(node) > 1)
749 src_mode = get_irn_mode(get_Conv_op(node));
750 dest_mode = get_irn_mode(node);
752 ia32_mode_needs_gp_reg(src_mode) &&
753 ia32_mode_needs_gp_reg(dest_mode) &&
754 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
757 /** Skip all signedness convs */
758 static ir_node *ia32_skip_sameconv(ir_node *node)
760 while (is_sameconv(node))
761 node = get_Conv_op(node);
766 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
768 ir_mode *mode = get_irn_mode(node);
773 if (mode_is_signed(mode)) {
778 block = get_nodes_block(node);
779 dbgi = get_irn_dbg_info(node);
781 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
785 * matches operands of a node into ia32 addressing/operand modes. This covers
786 * usage of source address mode, immediates, operations with non 32-bit modes,
788 * The resulting data is filled into the @p am struct. block is the block
789 * of the node whose arguments are matched. op1, op2 are the first and second
790 * input that are matched (op1 may be NULL). other_op is another unrelated
791 * input that is not matched! but which is needed sometimes to check if AM
792 * for op1/op2 is legal.
793 * @p flags describes the supported modes of the operation in detail.
795 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
796 ir_node *op1, ir_node *op2, ir_node *other_op,
799 ia32_address_t *addr = &am->addr;
800 ir_mode *mode = get_irn_mode(op2);
801 int mode_bits = get_mode_size_bits(mode);
802 ir_node *new_op1, *new_op2;
804 unsigned commutative;
805 int use_am_and_immediates;
808 memset(am, 0, sizeof(am[0]));
810 commutative = (flags & match_commutative) != 0;
811 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
812 use_am = (flags & match_am) != 0;
813 use_immediate = (flags & match_immediate) != 0;
814 assert(!use_am_and_immediates || use_immediate);
817 assert(!commutative || op1 != NULL);
818 assert(use_am || !(flags & match_8bit_am));
819 assert(use_am || !(flags & match_16bit_am));
821 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
822 (mode_bits == 16 && !(flags & match_16bit_am))) {
826 /* we can simply skip downconvs for mode neutral nodes: the upper bits
827 * can be random for these operations */
828 if (flags & match_mode_neutral) {
829 op2 = ia32_skip_downconv(op2);
831 op1 = ia32_skip_downconv(op1);
834 op2 = ia32_skip_sameconv(op2);
836 op1 = ia32_skip_sameconv(op1);
840 /* match immediates. firm nodes are normalized: constants are always on the
843 if (!(flags & match_try_am) && use_immediate) {
844 new_op2 = ia32_try_create_Immediate(op2, 0);
847 if (new_op2 == NULL &&
848 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
849 build_address(am, op2, ia32_create_am_normal);
850 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
851 if (mode_is_float(mode)) {
852 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
856 am->op_type = ia32_AddrModeS;
857 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
859 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
861 build_address(am, op1, ia32_create_am_normal);
863 if (mode_is_float(mode)) {
864 noreg = ia32_new_NoReg_vfp(current_ir_graph);
869 if (new_op2 != NULL) {
872 new_op1 = be_transform_node(op2);
874 am->ins_permuted = true;
876 am->op_type = ia32_AddrModeS;
879 am->op_type = ia32_Normal;
881 if (flags & match_try_am) {
887 mode = get_irn_mode(op2);
888 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
889 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
891 new_op2 = create_upconv(op2, NULL);
892 am->ls_mode = mode_Iu;
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
897 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1037 if (initial_fpcw != NULL)
1038 return initial_fpcw;
1040 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1041 &ia32_registers[REG_FPCW]);
1042 initial_fpcw = be_transform_node(fpcw);
1044 return initial_fpcw;
1048 * Construct a standard binary operation, set AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_binop_float_func *func)
1058 ir_mode *mode = get_irn_mode(node);
1060 ir_node *block, *new_block, *new_node;
1061 ia32_address_mode_t am;
1062 ia32_address_t *addr = &am.addr;
1063 ia32_x87_attr_t *attr;
1064 /* All operations are considered commutative, because there are reverse
1066 match_flags_t flags = match_commutative;
1068 /* happens for div nodes... */
1069 if (mode == mode_T) {
1071 mode = get_Div_resmode(node);
1073 panic("can't determine mode");
1076 /* cannot use address mode with long double on x87 */
1077 if (get_mode_size_bits(mode) <= 64)
1080 block = get_nodes_block(node);
1081 match_arguments(&am, block, op1, op2, NULL, flags);
1083 dbgi = get_irn_dbg_info(node);
1084 new_block = be_transform_node(block);
1085 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1086 am.new_op1, am.new_op2, get_fpcw());
1087 set_am_attributes(new_node, &am);
1089 attr = get_ia32_x87_attr(new_node);
1090 attr->attr.data.ins_permuted = am.ins_permuted;
1092 SET_IA32_ORIG_NODE(new_node, node);
1094 new_node = fix_mem_proj(new_node, &am);
1100 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1102 * @param op1 The first operand
1103 * @param op2 The second operand
1104 * @param func The node constructor function
1105 * @return The constructed ia32 node.
1107 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1108 construct_shift_func *func,
1109 match_flags_t flags)
1112 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1114 assert(! mode_is_float(get_irn_mode(node)));
1115 assert(flags & match_immediate);
1116 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1118 if (flags & match_mode_neutral) {
1119 op1 = ia32_skip_downconv(op1);
1120 new_op1 = be_transform_node(op1);
1121 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1122 new_op1 = create_upconv(op1, node);
1124 new_op1 = be_transform_node(op1);
1127 /* the shift amount can be any mode that is bigger than 5 bits, since all
1128 * other bits are ignored anyway */
1129 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1130 ir_node *const op = get_Conv_op(op2);
1131 if (mode_is_float(get_irn_mode(op)))
1134 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1136 new_op2 = create_immediate_or_transform(op2, 0);
1138 dbgi = get_irn_dbg_info(node);
1139 block = get_nodes_block(node);
1140 new_block = be_transform_node(block);
1141 new_node = func(dbgi, new_block, new_op1, new_op2);
1142 SET_IA32_ORIG_NODE(new_node, node);
1144 /* lowered shift instruction may have a dependency operand, handle it here */
1145 if (get_irn_arity(node) == 3) {
1146 /* we have a dependency */
1147 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1148 add_irn_dep(new_node, new_dep);
1156 * Construct a standard unary operation, set AM and immediate if required.
1158 * @param op The operand
1159 * @param func The node constructor function
1160 * @return The constructed ia32 node.
1162 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1163 match_flags_t flags)
1166 ir_node *block, *new_block, *new_op, *new_node;
1168 assert(flags == 0 || flags == match_mode_neutral);
1169 if (flags & match_mode_neutral) {
1170 op = ia32_skip_downconv(op);
1173 new_op = be_transform_node(op);
1174 dbgi = get_irn_dbg_info(node);
1175 block = get_nodes_block(node);
1176 new_block = be_transform_node(block);
1177 new_node = func(dbgi, new_block, new_op);
1179 SET_IA32_ORIG_NODE(new_node, node);
1184 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1185 ia32_address_t *addr)
1187 ir_node *base, *index, *res;
1193 base = be_transform_node(base);
1196 index = addr->index;
1197 if (index == NULL) {
1200 index = be_transform_node(index);
1203 res = new_bd_ia32_Lea(dbgi, block, base, index);
1204 set_address(res, addr);
1210 * Returns non-zero if a given address mode has a symbolic or
1211 * numerical offset != 0.
1213 static int am_has_immediates(const ia32_address_t *addr)
1215 return addr->offset != 0 || addr->symconst_ent != NULL
1216 || addr->frame_entity || addr->use_frame;
1220 * Creates an ia32 Add.
1222 * @return the created ia32 Add node
1224 static ir_node *gen_Add(ir_node *node)
1226 ir_mode *mode = get_irn_mode(node);
1227 ir_node *op1 = get_Add_left(node);
1228 ir_node *op2 = get_Add_right(node);
1230 ir_node *block, *new_block, *new_node, *add_immediate_op;
1231 ia32_address_t addr;
1232 ia32_address_mode_t am;
1234 if (mode_is_float(mode)) {
1235 if (ia32_cg_config.use_sse2)
1236 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1237 match_commutative | match_am);
1239 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1242 ia32_mark_non_am(node);
1244 op2 = ia32_skip_downconv(op2);
1245 op1 = ia32_skip_downconv(op1);
1249 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1250 * 1. Add with immediate -> Lea
1251 * 2. Add with possible source address mode -> Add
1252 * 3. Otherwise -> Lea
1254 memset(&addr, 0, sizeof(addr));
1255 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1256 add_immediate_op = NULL;
1258 dbgi = get_irn_dbg_info(node);
1259 block = get_nodes_block(node);
1260 new_block = be_transform_node(block);
1263 if (addr.base == NULL && addr.index == NULL) {
1264 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1265 addr.symconst_sign, 0, addr.offset);
1266 be_dep_on_frame(new_node);
1267 SET_IA32_ORIG_NODE(new_node, node);
1270 /* add with immediate? */
1271 if (addr.index == NULL) {
1272 add_immediate_op = addr.base;
1273 } else if (addr.base == NULL && addr.scale == 0) {
1274 add_immediate_op = addr.index;
1277 if (add_immediate_op != NULL) {
1278 if (!am_has_immediates(&addr)) {
1279 #ifdef DEBUG_libfirm
1280 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1283 return be_transform_node(add_immediate_op);
1286 new_node = create_lea_from_address(dbgi, new_block, &addr);
1287 SET_IA32_ORIG_NODE(new_node, node);
1291 /* test if we can use source address mode */
1292 match_arguments(&am, block, op1, op2, NULL, match_commutative
1293 | match_mode_neutral | match_am | match_immediate | match_try_am);
1295 /* construct an Add with source address mode */
1296 if (am.op_type == ia32_AddrModeS) {
1297 ia32_address_t *am_addr = &am.addr;
1298 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1299 am_addr->index, am_addr->mem, am.new_op1,
1301 set_am_attributes(new_node, &am);
1302 SET_IA32_ORIG_NODE(new_node, node);
1304 new_node = fix_mem_proj(new_node, &am);
1309 /* otherwise construct a lea */
1310 new_node = create_lea_from_address(dbgi, new_block, &addr);
1311 SET_IA32_ORIG_NODE(new_node, node);
1316 * Creates an ia32 Mul.
1318 * @return the created ia32 Mul node
1320 static ir_node *gen_Mul(ir_node *node)
1322 ir_node *op1 = get_Mul_left(node);
1323 ir_node *op2 = get_Mul_right(node);
1324 ir_mode *mode = get_irn_mode(node);
1326 if (mode_is_float(mode)) {
1327 if (ia32_cg_config.use_sse2)
1328 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1329 match_commutative | match_am);
1331 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1333 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1334 match_commutative | match_am | match_mode_neutral |
1335 match_immediate | match_am_and_immediates);
1339 * Creates an ia32 Mulh.
1340 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1341 * this result while Mul returns the lower 32 bit.
1343 * @return the created ia32 Mulh node
1345 static ir_node *gen_Mulh(ir_node *node)
1347 dbg_info *dbgi = get_irn_dbg_info(node);
1348 ir_node *op1 = get_Mulh_left(node);
1349 ir_node *op2 = get_Mulh_right(node);
1350 ir_mode *mode = get_irn_mode(node);
1352 ir_node *proj_res_high;
1354 if (get_mode_size_bits(mode) != 32) {
1355 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1358 if (mode_is_signed(mode)) {
1359 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1360 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1362 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1363 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1365 return proj_res_high;
1369 * Creates an ia32 And.
1371 * @return The created ia32 And node
1373 static ir_node *gen_And(ir_node *node)
1375 ir_node *op1 = get_And_left(node);
1376 ir_node *op2 = get_And_right(node);
1377 assert(! mode_is_float(get_irn_mode(node)));
1379 /* is it a zero extension? */
1380 if (is_Const(op2)) {
1381 ir_tarval *tv = get_Const_tarval(op2);
1382 long v = get_tarval_long(tv);
1384 if (v == 0xFF || v == 0xFFFF) {
1385 dbg_info *dbgi = get_irn_dbg_info(node);
1386 ir_node *block = get_nodes_block(node);
1393 assert(v == 0xFFFF);
1396 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1401 return gen_binop(node, op1, op2, new_bd_ia32_And,
1402 match_commutative | match_mode_neutral | match_am | match_immediate);
1408 * Creates an ia32 Or.
1410 * @return The created ia32 Or node
1412 static ir_node *gen_Or(ir_node *node)
1414 ir_node *op1 = get_Or_left(node);
1415 ir_node *op2 = get_Or_right(node);
1417 assert (! mode_is_float(get_irn_mode(node)));
1418 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1419 | match_mode_neutral | match_am | match_immediate);
1425 * Creates an ia32 Eor.
1427 * @return The created ia32 Eor node
1429 static ir_node *gen_Eor(ir_node *node)
1431 ir_node *op1 = get_Eor_left(node);
1432 ir_node *op2 = get_Eor_right(node);
1434 assert(! mode_is_float(get_irn_mode(node)));
1435 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1436 | match_mode_neutral | match_am | match_immediate);
1441 * Creates an ia32 Sub.
1443 * @return The created ia32 Sub node
1445 static ir_node *gen_Sub(ir_node *node)
1447 ir_node *op1 = get_Sub_left(node);
1448 ir_node *op2 = get_Sub_right(node);
1449 ir_mode *mode = get_irn_mode(node);
1451 if (mode_is_float(mode)) {
1452 if (ia32_cg_config.use_sse2)
1453 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1455 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1458 if (is_Const(op2)) {
1459 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1463 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1464 | match_am | match_immediate);
1467 static ir_node *transform_AM_mem(ir_node *const block,
1468 ir_node *const src_val,
1469 ir_node *const src_mem,
1470 ir_node *const am_mem)
1472 if (is_NoMem(am_mem)) {
1473 return be_transform_node(src_mem);
1474 } else if (is_Proj(src_val) &&
1476 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1477 /* avoid memory loop */
1479 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1480 ir_node *const ptr_pred = get_Proj_pred(src_val);
1481 int const arity = get_Sync_n_preds(src_mem);
1486 NEW_ARR_A(ir_node*, ins, arity + 1);
1488 /* NOTE: This sometimes produces dead-code because the old sync in
1489 * src_mem might not be used anymore, we should detect this case
1490 * and kill the sync... */
1491 for (i = arity - 1; i >= 0; --i) {
1492 ir_node *const pred = get_Sync_pred(src_mem, i);
1494 /* avoid memory loop */
1495 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1498 ins[n++] = be_transform_node(pred);
1503 return new_r_Sync(block, n, ins);
1507 ins[0] = be_transform_node(src_mem);
1509 return new_r_Sync(block, 2, ins);
1514 * Create a 32bit to 64bit signed extension.
1516 * @param dbgi debug info
1517 * @param block the block where node nodes should be placed
1518 * @param val the value to extend
1519 * @param orig the original node
1521 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1522 ir_node *val, const ir_node *orig)
1527 if (ia32_cg_config.use_short_sex_eax) {
1528 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1529 be_dep_on_frame(pval);
1530 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1532 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1533 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1535 SET_IA32_ORIG_NODE(res, orig);
1540 * Generates an ia32 Div with additional infrastructure for the
1541 * register allocator if needed.
1543 static ir_node *create_Div(ir_node *node)
1545 dbg_info *dbgi = get_irn_dbg_info(node);
1546 ir_node *block = get_nodes_block(node);
1547 ir_node *new_block = be_transform_node(block);
1554 ir_node *sign_extension;
1555 ia32_address_mode_t am;
1556 ia32_address_t *addr = &am.addr;
1558 /* the upper bits have random contents for smaller modes */
1559 switch (get_irn_opcode(node)) {
1561 op1 = get_Div_left(node);
1562 op2 = get_Div_right(node);
1563 mem = get_Div_mem(node);
1564 mode = get_Div_resmode(node);
1567 op1 = get_Mod_left(node);
1568 op2 = get_Mod_right(node);
1569 mem = get_Mod_mem(node);
1570 mode = get_Mod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 ir_mode *mode = get_Div_resmode(node);
1620 if (mode_is_float(mode)) {
1621 ir_node *op1 = get_Div_left(node);
1622 ir_node *op2 = get_Div_right(node);
1624 if (ia32_cg_config.use_sse2) {
1625 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1627 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1631 return create_Div(node);
1635 * Creates an ia32 Shl.
1637 * @return The created ia32 Shl node
1639 static ir_node *gen_Shl(ir_node *node)
1641 ir_node *left = get_Shl_left(node);
1642 ir_node *right = get_Shl_right(node);
1644 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1645 match_mode_neutral | match_immediate);
1649 * Creates an ia32 Shr.
1651 * @return The created ia32 Shr node
1653 static ir_node *gen_Shr(ir_node *node)
1655 ir_node *left = get_Shr_left(node);
1656 ir_node *right = get_Shr_right(node);
1658 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1664 * Creates an ia32 Sar.
1666 * @return The created ia32 Shrs node
1668 static ir_node *gen_Shrs(ir_node *node)
1670 ir_node *left = get_Shrs_left(node);
1671 ir_node *right = get_Shrs_right(node);
1673 if (is_Const(right)) {
1674 ir_tarval *tv = get_Const_tarval(right);
1675 long val = get_tarval_long(tv);
1677 /* this is a sign extension */
1678 dbg_info *dbgi = get_irn_dbg_info(node);
1679 ir_node *block = be_transform_node(get_nodes_block(node));
1680 ir_node *new_op = be_transform_node(left);
1682 return create_sex_32_64(dbgi, block, new_op, node);
1686 /* 8 or 16 bit sign extension? */
1687 if (is_Const(right) && is_Shl(left)) {
1688 ir_node *shl_left = get_Shl_left(left);
1689 ir_node *shl_right = get_Shl_right(left);
1690 if (is_Const(shl_right)) {
1691 ir_tarval *tv1 = get_Const_tarval(right);
1692 ir_tarval *tv2 = get_Const_tarval(shl_right);
1693 if (tv1 == tv2 && tarval_is_long(tv1)) {
1694 long val = get_tarval_long(tv1);
1695 if (val == 16 || val == 24) {
1696 dbg_info *dbgi = get_irn_dbg_info(node);
1697 ir_node *block = get_nodes_block(node);
1707 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1716 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1722 * Creates an ia32 Rol.
1724 * @param op1 The first operator
1725 * @param op2 The second operator
1726 * @return The created ia32 RotL node
1728 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1730 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1736 * Creates an ia32 Ror.
1737 * NOTE: There is no RotR with immediate because this would always be a RotL
1738 * "imm-mode_size_bits" which can be pre-calculated.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotR node
1744 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1752 * Creates an ia32 RotR or RotL (depending on the found pattern).
1754 * @return The created ia32 RotL or RotR node
1756 static ir_node *gen_Rotl(ir_node *node)
1758 ir_node *op1 = get_Rotl_left(node);
1759 ir_node *op2 = get_Rotl_right(node);
1761 if (is_Minus(op2)) {
1762 return gen_Ror(node, op1, get_Minus_op(op2));
1765 return gen_Rol(node, op1, op2);
1771 * Transforms a Minus node.
1773 * @return The created ia32 Minus node
1775 static ir_node *gen_Minus(ir_node *node)
1777 ir_node *op = get_Minus_op(node);
1778 ir_node *block = be_transform_node(get_nodes_block(node));
1779 dbg_info *dbgi = get_irn_dbg_info(node);
1780 ir_mode *mode = get_irn_mode(node);
1785 if (mode_is_float(mode)) {
1786 ir_node *new_op = be_transform_node(op);
1787 if (ia32_cg_config.use_sse2) {
1788 /* TODO: non-optimal... if we have many xXors, then we should
1789 * rather create a load for the const and use that instead of
1790 * several AM nodes... */
1791 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1793 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1794 noreg_GP, nomem, new_op, noreg_xmm);
1796 size = get_mode_size_bits(mode);
1797 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1799 set_ia32_am_sc(new_node, ent);
1800 set_ia32_op_type(new_node, ia32_AddrModeS);
1801 set_ia32_ls_mode(new_node, mode);
1803 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1806 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1809 SET_IA32_ORIG_NODE(new_node, node);
1815 * Transforms a Not node.
1817 * @return The created ia32 Not node
1819 static ir_node *gen_Not(ir_node *node)
1821 ir_node *op = get_Not_op(node);
1823 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1824 assert (! mode_is_float(get_irn_mode(node)));
1826 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1829 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1830 bool negate, ir_node *node)
1832 ir_node *new_block = be_transform_node(block);
1833 ir_mode *mode = get_irn_mode(op);
1839 if (mode_is_float(mode)) {
1840 new_op = be_transform_node(op);
1842 if (ia32_cg_config.use_sse2) {
1843 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1844 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1845 noreg_GP, nomem, new_op, noreg_fp);
1847 size = get_mode_size_bits(mode);
1848 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1850 set_ia32_am_sc(new_node, ent);
1852 SET_IA32_ORIG_NODE(new_node, node);
1854 set_ia32_op_type(new_node, ia32_AddrModeS);
1855 set_ia32_ls_mode(new_node, mode);
1857 /* TODO, implement -Abs case */
1860 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1861 SET_IA32_ORIG_NODE(new_node, node);
1863 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1864 SET_IA32_ORIG_NODE(new_node, node);
1869 ir_node *sign_extension;
1871 if (get_mode_size_bits(mode) == 32) {
1872 new_op = be_transform_node(op);
1874 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1877 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1879 xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1880 nomem, new_op, sign_extension);
1881 SET_IA32_ORIG_NODE(xorn, node);
1884 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1885 nomem, sign_extension, xorn);
1887 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1888 nomem, xorn, sign_extension);
1890 SET_IA32_ORIG_NODE(new_node, node);
1897 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1899 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1901 dbg_info *dbgi = get_irn_dbg_info(cmp);
1902 ir_node *block = get_nodes_block(cmp);
1903 ir_node *new_block = be_transform_node(block);
1904 ir_node *op1 = be_transform_node(x);
1905 ir_node *op2 = be_transform_node(n);
1907 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1910 static ia32_condition_code_t pnc_to_condition_code(pn_Cmp pnc, ir_mode *mode)
1912 if (mode_is_float(mode)) {
1914 case pn_Cmp_Eq: return ia32_cc_float_equal;
1915 case pn_Cmp_Lt: return ia32_cc_float_below;
1916 case pn_Cmp_Le: return ia32_cc_float_below_equal;
1917 case pn_Cmp_Gt: return ia32_cc_float_above;
1918 case pn_Cmp_Ge: return ia32_cc_float_above_equal;
1919 case pn_Cmp_Lg: return ia32_cc_not_equal;
1920 case pn_Cmp_Leg: return ia32_cc_not_parity;
1921 case pn_Cmp_Uo: return ia32_cc_parity;
1922 case pn_Cmp_Ue: return ia32_cc_equal;
1923 case pn_Cmp_Ul: return ia32_cc_float_unordered_below;
1924 case pn_Cmp_Ule: return ia32_cc_float_unordered_below_equal;
1925 case pn_Cmp_Ug: return ia32_cc_float_unordered_above;
1926 case pn_Cmp_Uge: return ia32_cc_float_unordered_above_equal;
1927 case pn_Cmp_Ne: return ia32_cc_float_not_equal;
1931 /* should we introduce a jump always/jump never? */
1934 panic("Unexpected float pnc");
1935 } else if (mode_is_signed(mode)) {
1938 case pn_Cmp_Eq: return ia32_cc_equal;
1940 case pn_Cmp_Lt: return ia32_cc_less;
1942 case pn_Cmp_Le: return ia32_cc_less_equal;
1944 case pn_Cmp_Gt: return ia32_cc_greater;
1946 case pn_Cmp_Ge: return ia32_cc_greater_equal;
1948 case pn_Cmp_Ne: return ia32_cc_not_equal;
1954 /* introduce jump always/jump never? */
1957 panic("Unexpected pnc");
1961 case pn_Cmp_Eq: return ia32_cc_equal;
1963 case pn_Cmp_Lt: return ia32_cc_below;
1965 case pn_Cmp_Le: return ia32_cc_below_equal;
1967 case pn_Cmp_Gt: return ia32_cc_above;
1969 case pn_Cmp_Ge: return ia32_cc_above_equal;
1971 case pn_Cmp_Ne: return ia32_cc_not_equal;
1977 /* introduce jump always/jump never? */
1980 panic("Unexpected pnc");
1984 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
1986 /* a mode_b value, we have to compare it against 0 */
1987 dbg_info *dbgi = get_irn_dbg_info(node);
1988 ir_node *new_block = be_transform_node(get_nodes_block(node));
1989 ir_node *new_op = be_transform_node(node);
1990 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
1991 *cc_out = ia32_cc_not_equal;
1995 static ir_node *get_flags_node_cmp(ir_node *node, ia32_condition_code_t *cc_out)
1997 /* must have a Proj(Cmp) as input */
1998 ir_node *cmp = get_Proj_pred(node);
1999 int pnc = get_Proj_pn_cmp(node);
2000 ir_node *l = get_Cmp_left(cmp);
2001 ir_mode *mode = get_irn_mode(l);
2004 /* check for bit-test */
2005 if (ia32_cg_config.use_bt
2006 && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq || pnc == pn_Cmp_Ne
2007 || pnc == pn_Cmp_Ue)) {
2008 ir_node *l = get_Cmp_left(cmp);
2009 ir_node *r = get_Cmp_right(cmp);
2011 ir_node *la = get_And_left(l);
2012 ir_node *ra = get_And_right(l);
2019 ir_node *c = get_Shl_left(la);
2020 if (is_Const_1(c) && is_Const_0(r)) {
2021 /* (1 << n) & ra) */
2022 ir_node *n = get_Shl_right(la);
2023 flags = gen_bt(cmp, ra, n);
2024 /* the bit is copied into the CF flag */
2025 if (pnc & pn_Cmp_Eq)
2026 *cc_out = ia32_cc_below; /* ==0, so we test for CF=1 */
2028 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2035 /* just do a normal transformation of the Cmp */
2036 *cc_out = pnc_to_condition_code(pnc, mode);
2037 flags = be_transform_node(cmp);
2042 * Transform a node returning a "flag" result.
2044 * @param node the node to transform
2045 * @param cc_out the compare mode to use
2047 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2049 if (is_Proj(node) && is_Cmp(get_Proj_pred(node)))
2050 return get_flags_node_cmp(node, cc_out);
2051 assert(get_irn_mode(node) == mode_b);
2052 return get_flags_mode_b(node, cc_out);
2056 * Transforms a Load.
2058 * @return the created ia32 Load node
2060 static ir_node *gen_Load(ir_node *node)
2062 ir_node *old_block = get_nodes_block(node);
2063 ir_node *block = be_transform_node(old_block);
2064 ir_node *ptr = get_Load_ptr(node);
2065 ir_node *mem = get_Load_mem(node);
2066 ir_node *new_mem = be_transform_node(mem);
2069 dbg_info *dbgi = get_irn_dbg_info(node);
2070 ir_mode *mode = get_Load_mode(node);
2072 ia32_address_t addr;
2074 /* construct load address */
2075 memset(&addr, 0, sizeof(addr));
2076 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2083 base = be_transform_node(base);
2086 if (index == NULL) {
2089 index = be_transform_node(index);
2092 if (mode_is_float(mode)) {
2093 if (ia32_cg_config.use_sse2) {
2094 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2097 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2101 assert(mode != mode_b);
2103 /* create a conv node with address mode for smaller modes */
2104 if (get_mode_size_bits(mode) < 32) {
2105 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2106 new_mem, noreg_GP, mode);
2108 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2112 set_irn_pinned(new_node, get_irn_pinned(node));
2113 set_ia32_op_type(new_node, ia32_AddrModeS);
2114 set_ia32_ls_mode(new_node, mode);
2115 set_address(new_node, &addr);
2117 if (get_irn_pinned(node) == op_pin_state_floats) {
2118 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2119 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2120 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2121 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2124 SET_IA32_ORIG_NODE(new_node, node);
2126 be_dep_on_frame(new_node);
2130 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2131 ir_node *ptr, ir_node *other)
2138 /* we only use address mode if we're the only user of the load */
2139 if (get_irn_n_edges(node) > 1)
2142 load = get_Proj_pred(node);
2145 if (get_nodes_block(load) != block)
2148 /* store should have the same pointer as the load */
2149 if (get_Load_ptr(load) != ptr)
2152 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2153 if (other != NULL &&
2154 get_nodes_block(other) == block &&
2155 heights_reachable_in_block(ia32_heights, other, load)) {
2159 if (ia32_prevents_AM(block, load, mem))
2161 /* Store should be attached to the load via mem */
2162 assert(heights_reachable_in_block(ia32_heights, mem, load));
2167 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2168 ir_node *mem, ir_node *ptr, ir_mode *mode,
2169 construct_binop_dest_func *func,
2170 construct_binop_dest_func *func8bit,
2171 match_flags_t flags)
2173 ir_node *src_block = get_nodes_block(node);
2181 ia32_address_mode_t am;
2182 ia32_address_t *addr = &am.addr;
2183 memset(&am, 0, sizeof(am));
2185 assert(flags & match_immediate); /* there is no destam node without... */
2186 commutative = (flags & match_commutative) != 0;
2188 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2189 build_address(&am, op1, ia32_create_am_double_use);
2190 new_op = create_immediate_or_transform(op2, 0);
2191 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2192 build_address(&am, op2, ia32_create_am_double_use);
2193 new_op = create_immediate_or_transform(op1, 0);
2198 if (addr->base == NULL)
2199 addr->base = noreg_GP;
2200 if (addr->index == NULL)
2201 addr->index = noreg_GP;
2202 if (addr->mem == NULL)
2205 dbgi = get_irn_dbg_info(node);
2206 block = be_transform_node(src_block);
2207 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2209 if (get_mode_size_bits(mode) == 8) {
2210 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2212 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2214 set_address(new_node, addr);
2215 set_ia32_op_type(new_node, ia32_AddrModeD);
2216 set_ia32_ls_mode(new_node, mode);
2217 SET_IA32_ORIG_NODE(new_node, node);
2219 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2220 mem_proj = be_transform_node(am.mem_proj);
2221 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2226 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2227 ir_node *ptr, ir_mode *mode,
2228 construct_unop_dest_func *func)
2230 ir_node *src_block = get_nodes_block(node);
2236 ia32_address_mode_t am;
2237 ia32_address_t *addr = &am.addr;
2239 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2242 memset(&am, 0, sizeof(am));
2243 build_address(&am, op, ia32_create_am_double_use);
2245 dbgi = get_irn_dbg_info(node);
2246 block = be_transform_node(src_block);
2247 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2248 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2249 set_address(new_node, addr);
2250 set_ia32_op_type(new_node, ia32_AddrModeD);
2251 set_ia32_ls_mode(new_node, mode);
2252 SET_IA32_ORIG_NODE(new_node, node);
2254 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2255 mem_proj = be_transform_node(am.mem_proj);
2256 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2261 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2263 ir_mode *mode = get_irn_mode(node);
2264 ir_node *mux_true = get_Mux_true(node);
2265 ir_node *mux_false = get_Mux_false(node);
2273 ia32_condition_code_t cc;
2274 ia32_address_t addr;
2276 if (get_mode_size_bits(mode) != 8)
2279 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2281 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2287 cond = get_Mux_sel(node);
2288 flags = get_flags_node(cond, &cc);
2289 /* we can't handle the float special cases with SetM */
2290 if (cc & ia32_cc_additional_float_cases)
2293 cc = ia32_negate_condition_code(cc);
2295 build_address_ptr(&addr, ptr, mem);
2297 dbgi = get_irn_dbg_info(node);
2298 block = get_nodes_block(node);
2299 new_block = be_transform_node(block);
2300 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2301 addr.index, addr.mem, flags, cc);
2302 set_address(new_node, &addr);
2303 set_ia32_op_type(new_node, ia32_AddrModeD);
2304 set_ia32_ls_mode(new_node, mode);
2305 SET_IA32_ORIG_NODE(new_node, node);
2310 static ir_node *try_create_dest_am(ir_node *node)
2312 ir_node *val = get_Store_value(node);
2313 ir_node *mem = get_Store_mem(node);
2314 ir_node *ptr = get_Store_ptr(node);
2315 ir_mode *mode = get_irn_mode(val);
2316 unsigned bits = get_mode_size_bits(mode);
2321 /* handle only GP modes for now... */
2322 if (!ia32_mode_needs_gp_reg(mode))
2326 /* store must be the only user of the val node */
2327 if (get_irn_n_edges(val) > 1)
2329 /* skip pointless convs */
2331 ir_node *conv_op = get_Conv_op(val);
2332 ir_mode *pred_mode = get_irn_mode(conv_op);
2333 if (!ia32_mode_needs_gp_reg(pred_mode))
2335 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2343 /* value must be in the same block */
2344 if (get_nodes_block(node) != get_nodes_block(val))
2347 switch (get_irn_opcode(val)) {
2349 op1 = get_Add_left(val);
2350 op2 = get_Add_right(val);
2351 if (ia32_cg_config.use_incdec) {
2352 if (is_Const_1(op2)) {
2353 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2355 } else if (is_Const_Minus_1(op2)) {
2356 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2360 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2361 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2362 match_commutative | match_immediate);
2365 op1 = get_Sub_left(val);
2366 op2 = get_Sub_right(val);
2367 if (is_Const(op2)) {
2368 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2370 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2371 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2375 op1 = get_And_left(val);
2376 op2 = get_And_right(val);
2377 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2378 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2379 match_commutative | match_immediate);
2382 op1 = get_Or_left(val);
2383 op2 = get_Or_right(val);
2384 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2385 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2386 match_commutative | match_immediate);
2389 op1 = get_Eor_left(val);
2390 op2 = get_Eor_right(val);
2391 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2392 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2393 match_commutative | match_immediate);
2396 op1 = get_Shl_left(val);
2397 op2 = get_Shl_right(val);
2398 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2399 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2403 op1 = get_Shr_left(val);
2404 op2 = get_Shr_right(val);
2405 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2406 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2410 op1 = get_Shrs_left(val);
2411 op2 = get_Shrs_right(val);
2412 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2413 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2417 op1 = get_Rotl_left(val);
2418 op2 = get_Rotl_right(val);
2419 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2420 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2423 /* TODO: match ROR patterns... */
2425 new_node = try_create_SetMem(val, ptr, mem);
2429 op1 = get_Minus_op(val);
2430 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2433 /* should be lowered already */
2434 assert(mode != mode_b);
2435 op1 = get_Not_op(val);
2436 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2442 if (new_node != NULL) {
2443 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2444 get_irn_pinned(node) == op_pin_state_pinned) {
2445 set_irn_pinned(new_node, op_pin_state_pinned);
2452 static bool possible_int_mode_for_fp(ir_mode *mode)
2456 if (!mode_is_signed(mode))
2458 size = get_mode_size_bits(mode);
2459 if (size != 16 && size != 32)
2464 static int is_float_to_int_conv(const ir_node *node)
2466 ir_mode *mode = get_irn_mode(node);
2470 if (!possible_int_mode_for_fp(mode))
2475 conv_op = get_Conv_op(node);
2476 conv_mode = get_irn_mode(conv_op);
2478 if (!mode_is_float(conv_mode))
2485 * Transform a Store(floatConst) into a sequence of
2488 * @return the created ia32 Store node
2490 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2492 ir_mode *mode = get_irn_mode(cns);
2493 unsigned size = get_mode_size_bytes(mode);
2494 ir_tarval *tv = get_Const_tarval(cns);
2495 ir_node *block = get_nodes_block(node);
2496 ir_node *new_block = be_transform_node(block);
2497 ir_node *ptr = get_Store_ptr(node);
2498 ir_node *mem = get_Store_mem(node);
2499 dbg_info *dbgi = get_irn_dbg_info(node);
2503 ia32_address_t addr;
2505 assert(size % 4 == 0);
2508 build_address_ptr(&addr, ptr, mem);
2512 get_tarval_sub_bits(tv, ofs) |
2513 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2514 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2515 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2516 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2518 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2519 addr.index, addr.mem, imm);
2521 set_irn_pinned(new_node, get_irn_pinned(node));
2522 set_ia32_op_type(new_node, ia32_AddrModeD);
2523 set_ia32_ls_mode(new_node, mode_Iu);
2524 set_address(new_node, &addr);
2525 SET_IA32_ORIG_NODE(new_node, node);
2528 ins[i++] = new_node;
2533 } while (size != 0);
2536 return new_rd_Sync(dbgi, new_block, i, ins);
2543 * Generate a vfist or vfisttp instruction.
2545 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2546 ir_node *mem, ir_node *val, ir_node **fist)
2550 if (ia32_cg_config.use_fisttp) {
2551 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2552 if other users exists */
2553 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2554 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2555 be_new_Keep(block, 1, &value);
2557 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2560 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2563 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2569 * Transforms a general (no special case) Store.
2571 * @return the created ia32 Store node
2573 static ir_node *gen_general_Store(ir_node *node)
2575 ir_node *val = get_Store_value(node);
2576 ir_mode *mode = get_irn_mode(val);
2577 ir_node *block = get_nodes_block(node);
2578 ir_node *new_block = be_transform_node(block);
2579 ir_node *ptr = get_Store_ptr(node);
2580 ir_node *mem = get_Store_mem(node);
2581 dbg_info *dbgi = get_irn_dbg_info(node);
2582 ir_node *new_val, *new_node, *store;
2583 ia32_address_t addr;
2585 /* check for destination address mode */
2586 new_node = try_create_dest_am(node);
2587 if (new_node != NULL)
2590 /* construct store address */
2591 memset(&addr, 0, sizeof(addr));
2592 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2594 if (addr.base == NULL) {
2595 addr.base = noreg_GP;
2597 addr.base = be_transform_node(addr.base);
2600 if (addr.index == NULL) {
2601 addr.index = noreg_GP;
2603 addr.index = be_transform_node(addr.index);
2605 addr.mem = be_transform_node(mem);
2607 if (mode_is_float(mode)) {
2608 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2610 while (is_Conv(val) && mode == get_irn_mode(val)) {
2611 ir_node *op = get_Conv_op(val);
2612 if (!mode_is_float(get_irn_mode(op)))
2616 new_val = be_transform_node(val);
2617 if (ia32_cg_config.use_sse2) {
2618 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2619 addr.index, addr.mem, new_val);
2621 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2622 addr.index, addr.mem, new_val, mode);
2625 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2626 val = get_Conv_op(val);
2628 /* TODO: is this optimisation still necessary at all (middleend)? */
2629 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2630 while (is_Conv(val)) {
2631 ir_node *op = get_Conv_op(val);
2632 if (!mode_is_float(get_irn_mode(op)))
2634 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2638 new_val = be_transform_node(val);
2639 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2641 new_val = create_immediate_or_transform(val, 0);
2642 assert(mode != mode_b);
2644 if (get_mode_size_bits(mode) == 8) {
2645 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2646 addr.index, addr.mem, new_val);
2648 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2649 addr.index, addr.mem, new_val);
2654 set_irn_pinned(store, get_irn_pinned(node));
2655 set_ia32_op_type(store, ia32_AddrModeD);
2656 set_ia32_ls_mode(store, mode);
2658 set_address(store, &addr);
2659 SET_IA32_ORIG_NODE(store, node);
2665 * Transforms a Store.
2667 * @return the created ia32 Store node
2669 static ir_node *gen_Store(ir_node *node)
2671 ir_node *val = get_Store_value(node);
2672 ir_mode *mode = get_irn_mode(val);
2674 if (mode_is_float(mode) && is_Const(val)) {
2675 /* We can transform every floating const store
2676 into a sequence of integer stores.
2677 If the constant is already in a register,
2678 it would be better to use it, but we don't
2679 have this information here. */
2680 return gen_float_const_Store(node, val);
2682 return gen_general_Store(node);
2686 * Transforms a Switch.
2688 * @return the created ia32 SwitchJmp node
2690 static ir_node *create_Switch(ir_node *node)
2692 dbg_info *dbgi = get_irn_dbg_info(node);
2693 ir_node *block = be_transform_node(get_nodes_block(node));
2694 ir_node *sel = get_Cond_selector(node);
2695 ir_node *new_sel = be_transform_node(sel);
2696 long switch_min = LONG_MAX;
2697 long switch_max = LONG_MIN;
2698 long default_pn = get_Cond_default_proj(node);
2700 const ir_edge_t *edge;
2702 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2704 /* determine the smallest switch case value */
2705 foreach_out_edge(node, edge) {
2706 ir_node *proj = get_edge_src_irn(edge);
2707 long pn = get_Proj_proj(proj);
2708 if (pn == default_pn)
2711 if (pn < switch_min)
2713 if (pn > switch_max)
2717 if ((unsigned long) (switch_max - switch_min) > 128000) {
2718 panic("Size of switch %+F bigger than 128000", node);
2721 if (switch_min != 0) {
2722 /* if smallest switch case is not 0 we need an additional sub */
2723 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2724 add_ia32_am_offs_int(new_sel, -switch_min);
2725 set_ia32_op_type(new_sel, ia32_AddrModeS);
2727 SET_IA32_ORIG_NODE(new_sel, node);
2730 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2731 SET_IA32_ORIG_NODE(new_node, node);
2737 * Transform a Cond node.
2739 static ir_node *gen_Cond(ir_node *node)
2741 ir_node *block = get_nodes_block(node);
2742 ir_node *new_block = be_transform_node(block);
2743 dbg_info *dbgi = get_irn_dbg_info(node);
2744 ir_node *sel = get_Cond_selector(node);
2745 ir_mode *sel_mode = get_irn_mode(sel);
2746 ir_node *flags = NULL;
2748 ia32_condition_code_t cc;
2750 if (sel_mode != mode_b) {
2751 return create_Switch(node);
2754 /* we get flags from a Cmp */
2755 flags = get_flags_node(sel, &cc);
2757 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2758 SET_IA32_ORIG_NODE(new_node, node);
2764 * Transform a be_Copy.
2766 static ir_node *gen_be_Copy(ir_node *node)
2768 ir_node *new_node = be_duplicate_node(node);
2769 ir_mode *mode = get_irn_mode(new_node);
2771 if (ia32_mode_needs_gp_reg(mode)) {
2772 set_irn_mode(new_node, mode_Iu);
2778 static ir_node *create_Fucom(ir_node *node)
2780 dbg_info *dbgi = get_irn_dbg_info(node);
2781 ir_node *block = get_nodes_block(node);
2782 ir_node *new_block = be_transform_node(block);
2783 ir_node *left = get_Cmp_left(node);
2784 ir_node *new_left = be_transform_node(left);
2785 ir_node *right = get_Cmp_right(node);
2789 if (ia32_cg_config.use_fucomi) {
2790 new_right = be_transform_node(right);
2791 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2793 set_ia32_commutative(new_node);
2794 SET_IA32_ORIG_NODE(new_node, node);
2796 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2797 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2799 new_right = be_transform_node(right);
2800 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2803 set_ia32_commutative(new_node);
2805 SET_IA32_ORIG_NODE(new_node, node);
2807 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2808 SET_IA32_ORIG_NODE(new_node, node);
2814 static ir_node *create_Ucomi(ir_node *node)
2816 dbg_info *dbgi = get_irn_dbg_info(node);
2817 ir_node *src_block = get_nodes_block(node);
2818 ir_node *new_block = be_transform_node(src_block);
2819 ir_node *left = get_Cmp_left(node);
2820 ir_node *right = get_Cmp_right(node);
2822 ia32_address_mode_t am;
2823 ia32_address_t *addr = &am.addr;
2825 match_arguments(&am, src_block, left, right, NULL,
2826 match_commutative | match_am);
2828 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2829 addr->mem, am.new_op1, am.new_op2,
2831 set_am_attributes(new_node, &am);
2833 SET_IA32_ORIG_NODE(new_node, node);
2835 new_node = fix_mem_proj(new_node, &am);
2841 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2842 * to fold an and into a test node
2844 static bool can_fold_test_and(ir_node *node)
2846 const ir_edge_t *edge;
2848 /** we can only have eq and lg projs */
2849 foreach_out_edge(node, edge) {
2850 ir_node *proj = get_edge_src_irn(edge);
2851 pn_Cmp pnc = get_Proj_pn_cmp(proj);
2852 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2860 * returns true if it is assured, that the upper bits of a node are "clean"
2861 * which means for a 16 or 8 bit value, that the upper bits in the register
2862 * are 0 for unsigned and a copy of the last significant bit for signed
2865 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2867 assert(ia32_mode_needs_gp_reg(mode));
2868 if (get_mode_size_bits(mode) >= 32)
2871 if (is_Proj(transformed_node))
2872 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2874 switch (get_ia32_irn_opcode(transformed_node)) {
2875 case iro_ia32_Conv_I2I:
2876 case iro_ia32_Conv_I2I8Bit: {
2877 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2878 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2880 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2887 if (mode_is_signed(mode)) {
2888 return false; /* TODO handle signed modes */
2890 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2891 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2892 const ia32_immediate_attr_t *attr
2893 = get_ia32_immediate_attr_const(right);
2894 if (attr->symconst == 0 &&
2895 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2899 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2903 /* TODO too conservative if shift amount is constant */
2904 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2907 if (!mode_is_signed(mode)) {
2909 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2910 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2912 /* TODO if one is known to be zero extended, then || is sufficient */
2917 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2918 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2920 case iro_ia32_Const:
2921 case iro_ia32_Immediate: {
2922 const ia32_immediate_attr_t *attr =
2923 get_ia32_immediate_attr_const(transformed_node);
2924 if (mode_is_signed(mode)) {
2925 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2926 return shifted == 0 || shifted == -1;
2928 unsigned long shifted = (unsigned long)attr->offset;
2929 shifted >>= get_mode_size_bits(mode);
2930 return shifted == 0;
2940 * Generate code for a Cmp.
2942 static ir_node *gen_Cmp(ir_node *node)
2944 dbg_info *dbgi = get_irn_dbg_info(node);
2945 ir_node *block = get_nodes_block(node);
2946 ir_node *new_block = be_transform_node(block);
2947 ir_node *left = get_Cmp_left(node);
2948 ir_node *right = get_Cmp_right(node);
2949 ir_mode *cmp_mode = get_irn_mode(left);
2951 ia32_address_mode_t am;
2952 ia32_address_t *addr = &am.addr;
2954 if (mode_is_float(cmp_mode)) {
2955 if (ia32_cg_config.use_sse2) {
2956 return create_Ucomi(node);
2958 return create_Fucom(node);
2962 assert(ia32_mode_needs_gp_reg(cmp_mode));
2964 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2965 if (is_Const_0(right) &&
2967 get_irn_n_edges(left) == 1 &&
2968 can_fold_test_and(node)) {
2969 /* Test(and_left, and_right) */
2970 ir_node *and_left = get_And_left(left);
2971 ir_node *and_right = get_And_right(left);
2973 /* matze: code here used mode instead of cmd_mode, I think it is always
2974 * the same as cmp_mode, but I leave this here to see if this is really
2977 assert(get_irn_mode(and_left) == cmp_mode);
2979 match_arguments(&am, block, and_left, and_right, NULL,
2981 match_am | match_8bit_am | match_16bit_am |
2982 match_am_and_immediates | match_immediate);
2984 /* use 32bit compare mode if possible since the opcode is smaller */
2985 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2986 upper_bits_clean(am.new_op2, cmp_mode)) {
2987 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2990 if (get_mode_size_bits(cmp_mode) == 8) {
2991 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2992 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
2994 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2995 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
2998 /* Cmp(left, right) */
2999 match_arguments(&am, block, left, right, NULL,
3000 match_commutative | match_am | match_8bit_am |
3001 match_16bit_am | match_am_and_immediates |
3003 /* use 32bit compare mode if possible since the opcode is smaller */
3004 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3005 upper_bits_clean(am.new_op2, cmp_mode)) {
3006 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3009 if (get_mode_size_bits(cmp_mode) == 8) {
3010 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3011 addr->index, addr->mem, am.new_op1,
3012 am.new_op2, am.ins_permuted);
3014 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3015 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3018 set_am_attributes(new_node, &am);
3019 set_ia32_ls_mode(new_node, cmp_mode);
3021 SET_IA32_ORIG_NODE(new_node, node);
3023 new_node = fix_mem_proj(new_node, &am);
3028 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3029 ia32_condition_code_t cc)
3031 dbg_info *dbgi = get_irn_dbg_info(node);
3032 ir_node *block = get_nodes_block(node);
3033 ir_node *new_block = be_transform_node(block);
3034 ir_node *val_true = get_Mux_true(node);
3035 ir_node *val_false = get_Mux_false(node);
3037 ia32_address_mode_t am;
3038 ia32_address_t *addr;
3040 assert(ia32_cg_config.use_cmov);
3041 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3045 match_arguments(&am, block, val_false, val_true, flags,
3046 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3048 if (am.ins_permuted)
3049 cc = ia32_invert_condition_code(cc);
3051 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3052 addr->mem, am.new_op1, am.new_op2, new_flags,
3054 set_am_attributes(new_node, &am);
3056 SET_IA32_ORIG_NODE(new_node, node);
3058 new_node = fix_mem_proj(new_node, &am);
3064 * Creates a ia32 Setcc instruction.
3066 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3067 ir_node *flags, ia32_condition_code_t cc,
3070 ir_mode *mode = get_irn_mode(orig_node);
3073 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3074 SET_IA32_ORIG_NODE(new_node, orig_node);
3076 /* we might need to conv the result up */
3077 if (get_mode_size_bits(mode) > 8) {
3078 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3079 nomem, new_node, mode_Bu);
3080 SET_IA32_ORIG_NODE(new_node, orig_node);
3087 * Create instruction for an unsigned Difference or Zero.
3089 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3091 ir_mode *mode = get_irn_mode(psi);
3101 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3102 match_mode_neutral | match_am | match_immediate | match_two_users);
3104 block = get_nodes_block(new_node);
3106 if (is_Proj(new_node)) {
3107 sub = get_Proj_pred(new_node);
3108 assert(is_ia32_Sub(sub));
3111 set_irn_mode(sub, mode_T);
3112 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3114 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3116 dbgi = get_irn_dbg_info(psi);
3117 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3118 notn = new_bd_ia32_Not(dbgi, block, sbb);
3120 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3121 set_ia32_commutative(new_node);
3126 * Create an const array of two float consts.
3128 * @param c0 the first constant
3129 * @param c1 the second constant
3130 * @param new_mode IN/OUT for the mode of the constants, if NULL
3131 * smallest possible mode will be used
3133 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3136 ir_mode *mode = *new_mode;
3138 ir_initializer_t *initializer;
3139 ir_tarval *tv0 = get_Const_tarval(c0);
3140 ir_tarval *tv1 = get_Const_tarval(c1);
3143 /* detect the best mode for the constants */
3144 mode = get_tarval_mode(tv0);
3146 if (mode != mode_F) {
3147 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3148 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3150 tv0 = tarval_convert_to(tv0, mode);
3151 tv1 = tarval_convert_to(tv1, mode);
3152 } else if (mode != mode_D) {
3153 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3154 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3156 tv0 = tarval_convert_to(tv0, mode);
3157 tv1 = tarval_convert_to(tv1, mode);
3164 tp = ia32_create_float_type(mode, 4);
3165 tp = ia32_create_float_array(tp);
3167 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3169 set_entity_ld_ident(ent, get_entity_ident(ent));
3170 set_entity_visibility(ent, ir_visibility_private);
3171 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3173 initializer = create_initializer_compound(2);
3175 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3176 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3178 set_entity_initializer(ent, initializer);
3185 * Possible transformations for creating a Setcc.
3187 enum setcc_transform_insn {
3200 typedef struct setcc_transform {
3202 ia32_condition_code_t cc;
3204 enum setcc_transform_insn transform;
3208 } setcc_transform_t;
3211 * Setcc can only handle 0 and 1 result.
3212 * Find a transformation that creates 0 and 1 from
3215 static void find_const_transform(ia32_condition_code_t cc,
3216 ir_tarval *t, ir_tarval *f,
3217 setcc_transform_t *res)
3223 if (tarval_is_null(t)) {
3227 cc = ia32_negate_condition_code(cc);
3228 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3229 // now, t is the bigger one
3233 cc = ia32_negate_condition_code(cc);
3237 if (! tarval_is_null(f)) {
3238 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3241 res->steps[step].transform = SETCC_TR_ADD;
3243 if (t == tarval_bad)
3244 panic("constant subtract failed");
3245 if (! tarval_is_long(f))
3246 panic("tarval is not long");
3248 res->steps[step].val = get_tarval_long(f);
3250 f = tarval_sub(f, f, NULL);
3251 assert(tarval_is_null(f));
3254 if (tarval_is_one(t)) {
3255 res->steps[step].transform = SETCC_TR_SET;
3256 res->num_steps = ++step;
3260 if (tarval_is_minus_one(t)) {
3261 res->steps[step].transform = SETCC_TR_NEG;
3263 res->steps[step].transform = SETCC_TR_SET;
3264 res->num_steps = ++step;
3267 if (tarval_is_long(t)) {
3268 long v = get_tarval_long(t);
3270 res->steps[step].val = 0;
3273 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3275 res->steps[step].transform = SETCC_TR_LEAxx;
3276 res->steps[step].scale = 3; /* (a << 3) + a */
3279 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3281 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3282 res->steps[step].scale = 3; /* (a << 3) */
3285 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3287 res->steps[step].transform = SETCC_TR_LEAxx;
3288 res->steps[step].scale = 2; /* (a << 2) + a */
3291 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3293 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3294 res->steps[step].scale = 2; /* (a << 2) */
3297 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3299 res->steps[step].transform = SETCC_TR_LEAxx;
3300 res->steps[step].scale = 1; /* (a << 1) + a */
3303 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3305 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3306 res->steps[step].scale = 1; /* (a << 1) */
3309 res->num_steps = step;
3312 if (! tarval_is_single_bit(t)) {
3313 res->steps[step].transform = SETCC_TR_AND;
3314 res->steps[step].val = v;
3316 res->steps[step].transform = SETCC_TR_NEG;
3318 int v = get_tarval_lowest_bit(t);
3321 res->steps[step].transform = SETCC_TR_SHL;
3322 res->steps[step].scale = v;
3326 res->steps[step].transform = SETCC_TR_SET;
3327 res->num_steps = ++step;
3330 panic("tarval is not long");
3334 * Transforms a Mux node into some code sequence.
3336 * @return The transformed node.
3338 static ir_node *gen_Mux(ir_node *node)
3340 dbg_info *dbgi = get_irn_dbg_info(node);
3341 ir_node *block = get_nodes_block(node);
3342 ir_node *new_block = be_transform_node(block);
3343 ir_node *mux_true = get_Mux_true(node);
3344 ir_node *mux_false = get_Mux_false(node);
3345 ir_node *cond = get_Mux_sel(node);
3346 ir_mode *mode = get_irn_mode(node);
3350 ia32_condition_code_t cc;
3352 assert(get_irn_mode(cond) == mode_b);
3354 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3356 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3359 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3360 if (mode_is_float(mode)) {
3361 ir_node *cmp = get_Proj_pred(cond);
3362 ir_node *cmp_left = get_Cmp_left(cmp);
3363 ir_node *cmp_right = get_Cmp_right(cmp);
3364 int pnc = get_Proj_proj(cond);
3366 if (ia32_cg_config.use_sse2) {
3367 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3368 if (cmp_left == mux_true && cmp_right == mux_false) {
3369 /* Mux(a <= b, a, b) => MIN */
3370 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3371 match_commutative | match_am | match_two_users);
3372 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3373 /* Mux(a <= b, b, a) => MAX */
3374 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3375 match_commutative | match_am | match_two_users);
3377 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3378 if (cmp_left == mux_true && cmp_right == mux_false) {
3379 /* Mux(a >= b, a, b) => MAX */
3380 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3381 match_commutative | match_am | match_two_users);
3382 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3383 /* Mux(a >= b, b, a) => MIN */
3384 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3385 match_commutative | match_am | match_two_users);
3390 if (is_Const(mux_true) && is_Const(mux_false)) {
3391 ia32_address_mode_t am;
3396 flags = get_flags_node(cond, &cc);
3397 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3399 if (ia32_cg_config.use_sse2) {
3400 /* cannot load from different mode on SSE */
3403 /* x87 can load any mode */
3407 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3409 switch (get_mode_size_bytes(new_mode)) {
3419 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3420 set_ia32_am_scale(new_node, 2);
3425 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3426 set_ia32_am_scale(new_node, 1);
3429 /* arg, shift 16 NOT supported */
3431 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3434 panic("Unsupported constant size");
3437 am.ls_mode = new_mode;
3438 am.addr.base = get_symconst_base();
3439 am.addr.index = new_node;
3440 am.addr.mem = nomem;
3442 am.addr.scale = scale;
3443 am.addr.use_frame = 0;
3444 am.addr.frame_entity = NULL;
3445 am.addr.symconst_sign = 0;
3446 am.mem_proj = am.addr.mem;
3447 am.op_type = ia32_AddrModeS;
3450 am.pinned = op_pin_state_floats;
3452 am.ins_permuted = false;
3454 if (ia32_cg_config.use_sse2)
3455 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3457 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3458 set_am_attributes(load, &am);
3460 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3462 panic("cannot transform floating point Mux");
3465 assert(ia32_mode_needs_gp_reg(mode));
3467 if (is_Proj(cond)) {
3468 ir_node *cmp = get_Proj_pred(cond);
3470 ir_node *cmp_left = get_Cmp_left(cmp);
3471 ir_node *cmp_right = get_Cmp_right(cmp);
3472 ir_node *val_true = mux_true;
3473 ir_node *val_false = mux_false;
3474 int pnc = get_Proj_proj(cond);
3476 if (is_Const(val_true) && is_Const_null(val_true)) {
3477 ir_node *tmp = val_false;
3478 val_false = val_true;
3480 pnc = get_negated_pnc(pnc, get_irn_mode(cmp_left));
3482 if (is_Const_0(val_false) && is_Sub(val_true)) {
3483 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3484 && get_Sub_left(val_true) == cmp_left
3485 && get_Sub_right(val_true) == cmp_right) {
3486 return create_doz(node, cmp_left, cmp_right);
3488 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3489 && get_Sub_left(val_true) == cmp_right
3490 && get_Sub_right(val_true) == cmp_left) {
3491 return create_doz(node, cmp_right, cmp_left);
3497 flags = get_flags_node(cond, &cc);
3499 if (is_Const(mux_true) && is_Const(mux_false)) {
3500 /* both are const, good */
3501 ir_tarval *tv_true = get_Const_tarval(mux_true);
3502 ir_tarval *tv_false = get_Const_tarval(mux_false);
3503 setcc_transform_t res;
3506 find_const_transform(cc, tv_true, tv_false, &res);
3508 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3511 switch (res.steps[step].transform) {
3513 imm = ia32_immediate_from_long(res.steps[step].val);
3514 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3516 case SETCC_TR_ADDxx:
3517 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3520 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3521 set_ia32_am_scale(new_node, res.steps[step].scale);
3522 set_ia32_am_offs_int(new_node, res.steps[step].val);
3524 case SETCC_TR_LEAxx:
3525 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3526 set_ia32_am_scale(new_node, res.steps[step].scale);
3527 set_ia32_am_offs_int(new_node, res.steps[step].val);
3530 imm = ia32_immediate_from_long(res.steps[step].scale);
3531 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3534 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3537 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3540 imm = ia32_immediate_from_long(res.steps[step].val);
3541 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3544 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3547 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3550 panic("unknown setcc transform");
3554 new_node = create_CMov(node, cond, flags, cc);
3562 * Create a conversion from x87 state register to general purpose.
3564 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3566 ir_node *block = be_transform_node(get_nodes_block(node));
3567 ir_node *op = get_Conv_op(node);
3568 ir_node *new_op = be_transform_node(op);
3569 ir_graph *irg = current_ir_graph;
3570 dbg_info *dbgi = get_irn_dbg_info(node);
3571 ir_mode *mode = get_irn_mode(node);
3572 ir_node *fist, *load, *mem;
3574 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3575 set_irn_pinned(fist, op_pin_state_floats);
3576 set_ia32_use_frame(fist);
3577 set_ia32_op_type(fist, ia32_AddrModeD);
3579 assert(get_mode_size_bits(mode) <= 32);
3580 /* exception we can only store signed 32 bit integers, so for unsigned
3581 we store a 64bit (signed) integer and load the lower bits */
3582 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3583 set_ia32_ls_mode(fist, mode_Ls);
3585 set_ia32_ls_mode(fist, mode_Is);
3587 SET_IA32_ORIG_NODE(fist, node);
3590 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3592 set_irn_pinned(load, op_pin_state_floats);
3593 set_ia32_use_frame(load);
3594 set_ia32_op_type(load, ia32_AddrModeS);
3595 set_ia32_ls_mode(load, mode_Is);
3596 if (get_ia32_ls_mode(fist) == mode_Ls) {
3597 ia32_attr_t *attr = get_ia32_attr(load);
3598 attr->data.need_64bit_stackent = 1;
3600 ia32_attr_t *attr = get_ia32_attr(load);
3601 attr->data.need_32bit_stackent = 1;
3603 SET_IA32_ORIG_NODE(load, node);
3605 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3609 * Creates a x87 strict Conv by placing a Store and a Load
3611 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3613 ir_node *block = get_nodes_block(node);
3614 ir_graph *irg = get_Block_irg(block);
3615 dbg_info *dbgi = get_irn_dbg_info(node);
3616 ir_node *frame = get_irg_frame(irg);
3617 ir_node *store, *load;
3620 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3621 set_ia32_use_frame(store);
3622 set_ia32_op_type(store, ia32_AddrModeD);
3623 SET_IA32_ORIG_NODE(store, node);
3625 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3626 set_ia32_use_frame(load);
3627 set_ia32_op_type(load, ia32_AddrModeS);
3628 SET_IA32_ORIG_NODE(load, node);
3630 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3634 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3635 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3637 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3639 func = get_mode_size_bits(mode) == 8 ?
3640 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3641 return func(dbgi, block, base, index, mem, val, mode);
3645 * Create a conversion from general purpose to x87 register
3647 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3649 ir_node *src_block = get_nodes_block(node);
3650 ir_node *block = be_transform_node(src_block);
3651 ir_graph *irg = get_Block_irg(block);
3652 dbg_info *dbgi = get_irn_dbg_info(node);
3653 ir_node *op = get_Conv_op(node);
3654 ir_node *new_op = NULL;
3656 ir_mode *store_mode;
3661 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3662 if (possible_int_mode_for_fp(src_mode)) {
3663 ia32_address_mode_t am;
3665 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3666 if (am.op_type == ia32_AddrModeS) {
3667 ia32_address_t *addr = &am.addr;
3669 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3670 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3672 set_am_attributes(fild, &am);
3673 SET_IA32_ORIG_NODE(fild, node);
3675 fix_mem_proj(fild, &am);
3680 if (new_op == NULL) {
3681 new_op = be_transform_node(op);
3684 mode = get_irn_mode(op);
3686 /* first convert to 32 bit signed if necessary */
3687 if (get_mode_size_bits(src_mode) < 32) {
3688 if (!upper_bits_clean(new_op, src_mode)) {
3689 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3690 SET_IA32_ORIG_NODE(new_op, node);
3695 assert(get_mode_size_bits(mode) == 32);
3698 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3700 set_ia32_use_frame(store);
3701 set_ia32_op_type(store, ia32_AddrModeD);
3702 set_ia32_ls_mode(store, mode_Iu);
3704 /* exception for 32bit unsigned, do a 64bit spill+load */
3705 if (!mode_is_signed(mode)) {
3708 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3710 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3711 noreg_GP, nomem, zero_const);
3713 set_ia32_use_frame(zero_store);
3714 set_ia32_op_type(zero_store, ia32_AddrModeD);
3715 add_ia32_am_offs_int(zero_store, 4);
3716 set_ia32_ls_mode(zero_store, mode_Iu);
3721 store = new_rd_Sync(dbgi, block, 2, in);
3722 store_mode = mode_Ls;
3724 store_mode = mode_Is;
3728 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3730 set_ia32_use_frame(fild);
3731 set_ia32_op_type(fild, ia32_AddrModeS);
3732 set_ia32_ls_mode(fild, store_mode);
3734 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3740 * Create a conversion from one integer mode into another one
3742 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3743 dbg_info *dbgi, ir_node *block, ir_node *op,
3746 ir_node *new_block = be_transform_node(block);
3748 ir_mode *smaller_mode;
3749 ia32_address_mode_t am;
3750 ia32_address_t *addr = &am.addr;
3753 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3754 smaller_mode = src_mode;
3756 smaller_mode = tgt_mode;
3759 #ifdef DEBUG_libfirm
3761 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3766 match_arguments(&am, block, NULL, op, NULL,
3767 match_am | match_8bit_am | match_16bit_am);
3769 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3770 /* unnecessary conv. in theory it shouldn't have been AM */
3771 assert(is_ia32_NoReg_GP(addr->base));
3772 assert(is_ia32_NoReg_GP(addr->index));
3773 assert(is_NoMem(addr->mem));
3774 assert(am.addr.offset == 0);
3775 assert(am.addr.symconst_ent == NULL);
3779 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3780 addr->mem, am.new_op2, smaller_mode);
3781 set_am_attributes(new_node, &am);
3782 /* match_arguments assume that out-mode = in-mode, this isn't true here
3784 set_ia32_ls_mode(new_node, smaller_mode);
3785 SET_IA32_ORIG_NODE(new_node, node);
3786 new_node = fix_mem_proj(new_node, &am);
3791 * Transforms a Conv node.
3793 * @return The created ia32 Conv node
3795 static ir_node *gen_Conv(ir_node *node)
3797 ir_node *block = get_nodes_block(node);
3798 ir_node *new_block = be_transform_node(block);
3799 ir_node *op = get_Conv_op(node);
3800 ir_node *new_op = NULL;
3801 dbg_info *dbgi = get_irn_dbg_info(node);
3802 ir_mode *src_mode = get_irn_mode(op);
3803 ir_mode *tgt_mode = get_irn_mode(node);
3804 int src_bits = get_mode_size_bits(src_mode);
3805 int tgt_bits = get_mode_size_bits(tgt_mode);
3806 ir_node *res = NULL;
3808 assert(!mode_is_int(src_mode) || src_bits <= 32);
3809 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3811 /* modeB -> X should already be lowered by the lower_mode_b pass */
3812 if (src_mode == mode_b) {
3813 panic("ConvB not lowered %+F", node);
3816 if (src_mode == tgt_mode) {
3817 if (get_Conv_strict(node)) {
3818 if (ia32_cg_config.use_sse2) {
3819 /* when we are in SSE mode, we can kill all strict no-op conversion */
3820 return be_transform_node(op);
3823 /* this should be optimized already, but who knows... */
3824 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3825 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3826 return be_transform_node(op);
3830 if (mode_is_float(src_mode)) {
3831 new_op = be_transform_node(op);
3832 /* we convert from float ... */
3833 if (mode_is_float(tgt_mode)) {
3835 if (ia32_cg_config.use_sse2) {
3836 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3837 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3839 set_ia32_ls_mode(res, tgt_mode);
3841 if (get_Conv_strict(node)) {
3842 /* if fp_no_float_fold is not set then we assume that we
3843 * don't have any float operations in a non
3844 * mode_float_arithmetic mode and can skip strict upconvs */
3845 if (src_bits < tgt_bits) {
3846 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3849 res = gen_x87_strict_conv(tgt_mode, new_op);
3850 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3854 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3859 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3860 if (ia32_cg_config.use_sse2) {
3861 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3863 set_ia32_ls_mode(res, src_mode);
3865 return gen_x87_fp_to_gp(node);
3869 /* we convert from int ... */
3870 if (mode_is_float(tgt_mode)) {
3872 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3873 if (ia32_cg_config.use_sse2) {
3874 new_op = be_transform_node(op);
3875 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3877 set_ia32_ls_mode(res, tgt_mode);
3879 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3880 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3881 res = gen_x87_gp_to_fp(node, src_mode);
3883 /* we need a strict-Conv, if the int mode has more bits than the
3885 if (float_mantissa < int_mantissa) {
3886 res = gen_x87_strict_conv(tgt_mode, res);
3887 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3891 } else if (tgt_mode == mode_b) {
3892 /* mode_b lowering already took care that we only have 0/1 values */
3893 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3894 src_mode, tgt_mode));
3895 return be_transform_node(op);
3898 if (src_bits == tgt_bits) {
3899 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3900 src_mode, tgt_mode));
3901 return be_transform_node(op);
3904 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3912 static ir_node *create_immediate_or_transform(ir_node *node,
3913 char immediate_constraint_type)
3915 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3916 if (new_node == NULL) {
3917 new_node = be_transform_node(node);
3923 * Transforms a FrameAddr into an ia32 Add.
3925 static ir_node *gen_be_FrameAddr(ir_node *node)
3927 ir_node *block = be_transform_node(get_nodes_block(node));
3928 ir_node *op = be_get_FrameAddr_frame(node);
3929 ir_node *new_op = be_transform_node(op);
3930 dbg_info *dbgi = get_irn_dbg_info(node);
3933 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3934 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3935 set_ia32_use_frame(new_node);
3937 SET_IA32_ORIG_NODE(new_node, node);
3943 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3945 static ir_node *gen_be_Return(ir_node *node)
3947 ir_graph *irg = current_ir_graph;
3948 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3949 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3950 ir_entity *ent = get_irg_entity(irg);
3951 ir_type *tp = get_entity_type(ent);
3956 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3957 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3959 int pn_ret_val, pn_ret_mem, arity, i;
3961 assert(ret_val != NULL);
3962 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3963 return be_duplicate_node(node);
3966 res_type = get_method_res_type(tp, 0);
3968 if (! is_Primitive_type(res_type)) {
3969 return be_duplicate_node(node);
3972 mode = get_type_mode(res_type);
3973 if (! mode_is_float(mode)) {
3974 return be_duplicate_node(node);
3977 assert(get_method_n_ress(tp) == 1);
3979 pn_ret_val = get_Proj_proj(ret_val);
3980 pn_ret_mem = get_Proj_proj(ret_mem);
3982 /* get the Barrier */
3983 barrier = get_Proj_pred(ret_val);
3985 /* get result input of the Barrier */
3986 ret_val = get_irn_n(barrier, pn_ret_val);
3987 new_ret_val = be_transform_node(ret_val);
3989 /* get memory input of the Barrier */
3990 ret_mem = get_irn_n(barrier, pn_ret_mem);
3991 new_ret_mem = be_transform_node(ret_mem);
3993 frame = get_irg_frame(irg);
3995 dbgi = get_irn_dbg_info(barrier);
3996 block = be_transform_node(get_nodes_block(barrier));
3998 /* store xmm0 onto stack */
3999 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4000 new_ret_mem, new_ret_val);
4001 set_ia32_ls_mode(sse_store, mode);
4002 set_ia32_op_type(sse_store, ia32_AddrModeD);
4003 set_ia32_use_frame(sse_store);
4005 /* load into x87 register */
4006 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
4007 set_ia32_op_type(fld, ia32_AddrModeS);
4008 set_ia32_use_frame(fld);
4010 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4011 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4013 /* create a new barrier */
4014 arity = get_irn_arity(barrier);
4015 in = ALLOCAN(ir_node*, arity);
4016 for (i = 0; i < arity; ++i) {
4019 if (i == pn_ret_val) {
4021 } else if (i == pn_ret_mem) {
4024 ir_node *in = get_irn_n(barrier, i);
4025 new_in = be_transform_node(in);
4030 new_barrier = new_ir_node(dbgi, irg, block,
4031 get_irn_op(barrier), get_irn_mode(barrier),
4033 copy_node_attr(irg, barrier, new_barrier);
4034 be_duplicate_deps(barrier, new_barrier);
4035 be_set_transformed_node(barrier, new_barrier);
4037 /* transform normally */
4038 return be_duplicate_node(node);
4042 * Transform a be_AddSP into an ia32_SubSP.
4044 static ir_node *gen_be_AddSP(ir_node *node)
4046 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4047 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4049 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4050 match_am | match_immediate);
4054 * Transform a be_SubSP into an ia32_AddSP
4056 static ir_node *gen_be_SubSP(ir_node *node)
4058 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4059 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4061 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4062 match_am | match_immediate);
4066 * Change some phi modes
4068 static ir_node *gen_Phi(ir_node *node)
4070 const arch_register_req_t *req;
4071 ir_node *block = be_transform_node(get_nodes_block(node));
4072 ir_graph *irg = current_ir_graph;
4073 dbg_info *dbgi = get_irn_dbg_info(node);
4074 ir_mode *mode = get_irn_mode(node);
4077 if (ia32_mode_needs_gp_reg(mode)) {
4078 /* we shouldn't have any 64bit stuff around anymore */
4079 assert(get_mode_size_bits(mode) <= 32);
4080 /* all integer operations are on 32bit registers now */
4082 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4083 } else if (mode_is_float(mode)) {
4084 if (ia32_cg_config.use_sse2) {
4086 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4089 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4092 req = arch_no_register_req;
4095 /* phi nodes allow loops, so we use the old arguments for now
4096 * and fix this later */
4097 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4098 get_irn_in(node) + 1);
4099 copy_node_attr(irg, node, phi);
4100 be_duplicate_deps(node, phi);
4102 arch_set_out_register_req(phi, 0, req);
4104 be_enqueue_preds(node);
4109 static ir_node *gen_Jmp(ir_node *node)
4111 ir_node *block = get_nodes_block(node);
4112 ir_node *new_block = be_transform_node(block);
4113 dbg_info *dbgi = get_irn_dbg_info(node);
4116 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4117 SET_IA32_ORIG_NODE(new_node, node);
4125 static ir_node *gen_IJmp(ir_node *node)
4127 ir_node *block = get_nodes_block(node);
4128 ir_node *new_block = be_transform_node(block);
4129 dbg_info *dbgi = get_irn_dbg_info(node);
4130 ir_node *op = get_IJmp_target(node);
4132 ia32_address_mode_t am;
4133 ia32_address_t *addr = &am.addr;
4135 assert(get_irn_mode(op) == mode_P);
4137 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4139 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4140 addr->mem, am.new_op2);
4141 set_am_attributes(new_node, &am);
4142 SET_IA32_ORIG_NODE(new_node, node);
4144 new_node = fix_mem_proj(new_node, &am);
4149 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4151 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4152 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4154 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4155 match_immediate | match_mode_neutral);
4158 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4160 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4161 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4162 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4166 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4168 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4169 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4170 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4174 static ir_node *gen_ia32_l_Add(ir_node *node)
4176 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4177 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4178 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4179 match_commutative | match_am | match_immediate |
4180 match_mode_neutral);
4182 if (is_Proj(lowered)) {
4183 lowered = get_Proj_pred(lowered);
4185 assert(is_ia32_Add(lowered));
4186 set_irn_mode(lowered, mode_T);
4192 static ir_node *gen_ia32_l_Adc(ir_node *node)
4194 return gen_binop_flags(node, new_bd_ia32_Adc,
4195 match_commutative | match_am | match_immediate |
4196 match_mode_neutral);
4200 * Transforms a l_MulS into a "real" MulS node.
4202 * @return the created ia32 Mul node
4204 static ir_node *gen_ia32_l_Mul(ir_node *node)
4206 ir_node *left = get_binop_left(node);
4207 ir_node *right = get_binop_right(node);
4209 return gen_binop(node, left, right, new_bd_ia32_Mul,
4210 match_commutative | match_am | match_mode_neutral);
4214 * Transforms a l_IMulS into a "real" IMul1OPS node.
4216 * @return the created ia32 IMul1OP node
4218 static ir_node *gen_ia32_l_IMul(ir_node *node)
4220 ir_node *left = get_binop_left(node);
4221 ir_node *right = get_binop_right(node);
4223 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4224 match_commutative | match_am | match_mode_neutral);
4227 static ir_node *gen_ia32_l_Sub(ir_node *node)
4229 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4230 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4231 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4232 match_am | match_immediate | match_mode_neutral);
4234 if (is_Proj(lowered)) {
4235 lowered = get_Proj_pred(lowered);
4237 assert(is_ia32_Sub(lowered));
4238 set_irn_mode(lowered, mode_T);
4244 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4246 return gen_binop_flags(node, new_bd_ia32_Sbb,
4247 match_am | match_immediate | match_mode_neutral);
4251 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4252 * op1 - target to be shifted
4253 * op2 - contains bits to be shifted into target
4255 * Only op3 can be an immediate.
4257 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4258 ir_node *low, ir_node *count)
4260 ir_node *block = get_nodes_block(node);
4261 ir_node *new_block = be_transform_node(block);
4262 dbg_info *dbgi = get_irn_dbg_info(node);
4263 ir_node *new_high = be_transform_node(high);
4264 ir_node *new_low = be_transform_node(low);
4268 /* the shift amount can be any mode that is bigger than 5 bits, since all
4269 * other bits are ignored anyway */
4270 while (is_Conv(count) &&
4271 get_irn_n_edges(count) == 1 &&
4272 mode_is_int(get_irn_mode(count))) {
4273 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4274 count = get_Conv_op(count);
4276 new_count = create_immediate_or_transform(count, 0);
4278 if (is_ia32_l_ShlD(node)) {
4279 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4282 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4285 SET_IA32_ORIG_NODE(new_node, node);
4290 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4292 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4293 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4294 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4295 return gen_lowered_64bit_shifts(node, high, low, count);
4298 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4300 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4301 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4302 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4303 return gen_lowered_64bit_shifts(node, high, low, count);
4306 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4308 ir_node *src_block = get_nodes_block(node);
4309 ir_node *block = be_transform_node(src_block);
4310 ir_graph *irg = current_ir_graph;
4311 dbg_info *dbgi = get_irn_dbg_info(node);
4312 ir_node *frame = get_irg_frame(irg);
4313 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4314 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4315 ir_node *new_val_low = be_transform_node(val_low);
4316 ir_node *new_val_high = be_transform_node(val_high);
4318 ir_node *sync, *fild, *res;
4319 ir_node *store_low, *store_high;
4321 if (ia32_cg_config.use_sse2) {
4322 panic("ia32_l_LLtoFloat not implemented for SSE2");
4326 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4328 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4330 SET_IA32_ORIG_NODE(store_low, node);
4331 SET_IA32_ORIG_NODE(store_high, node);
4333 set_ia32_use_frame(store_low);
4334 set_ia32_use_frame(store_high);
4335 set_ia32_op_type(store_low, ia32_AddrModeD);
4336 set_ia32_op_type(store_high, ia32_AddrModeD);
4337 set_ia32_ls_mode(store_low, mode_Iu);
4338 set_ia32_ls_mode(store_high, mode_Is);
4339 add_ia32_am_offs_int(store_high, 4);
4343 sync = new_rd_Sync(dbgi, block, 2, in);
4346 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4348 set_ia32_use_frame(fild);
4349 set_ia32_op_type(fild, ia32_AddrModeS);
4350 set_ia32_ls_mode(fild, mode_Ls);
4352 SET_IA32_ORIG_NODE(fild, node);
4354 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4356 if (! mode_is_signed(get_irn_mode(val_high))) {
4357 ia32_address_mode_t am;
4359 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4362 am.addr.base = get_symconst_base();
4363 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4364 am.addr.mem = nomem;
4367 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4368 am.addr.use_frame = 0;
4369 am.addr.frame_entity = NULL;
4370 am.addr.symconst_sign = 0;
4371 am.ls_mode = mode_F;
4372 am.mem_proj = nomem;
4373 am.op_type = ia32_AddrModeS;
4375 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4376 am.pinned = op_pin_state_floats;
4378 am.ins_permuted = false;
4380 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4381 am.new_op1, am.new_op2, get_fpcw());
4382 set_am_attributes(fadd, &am);
4384 set_irn_mode(fadd, mode_T);
4385 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4390 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4392 ir_node *src_block = get_nodes_block(node);
4393 ir_node *block = be_transform_node(src_block);
4394 ir_graph *irg = get_Block_irg(block);
4395 dbg_info *dbgi = get_irn_dbg_info(node);
4396 ir_node *frame = get_irg_frame(irg);
4397 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4398 ir_node *new_val = be_transform_node(val);
4399 ir_node *fist, *mem;
4401 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4402 SET_IA32_ORIG_NODE(fist, node);
4403 set_ia32_use_frame(fist);
4404 set_ia32_op_type(fist, ia32_AddrModeD);
4405 set_ia32_ls_mode(fist, mode_Ls);
4410 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4412 ir_node *block = be_transform_node(get_nodes_block(node));
4413 ir_graph *irg = get_Block_irg(block);
4414 ir_node *pred = get_Proj_pred(node);
4415 ir_node *new_pred = be_transform_node(pred);
4416 ir_node *frame = get_irg_frame(irg);
4417 dbg_info *dbgi = get_irn_dbg_info(node);
4418 long pn = get_Proj_proj(node);
4423 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4424 SET_IA32_ORIG_NODE(load, node);
4425 set_ia32_use_frame(load);
4426 set_ia32_op_type(load, ia32_AddrModeS);
4427 set_ia32_ls_mode(load, mode_Iu);
4428 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4429 * 32 bit from it with this particular load */
4430 attr = get_ia32_attr(load);
4431 attr->data.need_64bit_stackent = 1;
4433 if (pn == pn_ia32_l_FloattoLL_res_high) {
4434 add_ia32_am_offs_int(load, 4);
4436 assert(pn == pn_ia32_l_FloattoLL_res_low);
4439 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4445 * Transform the Projs of an AddSP.
4447 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4449 ir_node *pred = get_Proj_pred(node);
4450 ir_node *new_pred = be_transform_node(pred);
4451 dbg_info *dbgi = get_irn_dbg_info(node);
4452 long proj = get_Proj_proj(node);
4454 if (proj == pn_be_AddSP_sp) {
4455 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4456 pn_ia32_SubSP_stack);
4457 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4459 } else if (proj == pn_be_AddSP_res) {
4460 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4461 pn_ia32_SubSP_addr);
4462 } else if (proj == pn_be_AddSP_M) {
4463 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4466 panic("No idea how to transform proj->AddSP");
4470 * Transform the Projs of a SubSP.
4472 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4474 ir_node *pred = get_Proj_pred(node);
4475 ir_node *new_pred = be_transform_node(pred);
4476 dbg_info *dbgi = get_irn_dbg_info(node);
4477 long proj = get_Proj_proj(node);
4479 if (proj == pn_be_SubSP_sp) {
4480 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4481 pn_ia32_AddSP_stack);
4482 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4484 } else if (proj == pn_be_SubSP_M) {
4485 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4488 panic("No idea how to transform proj->SubSP");
4492 * Transform and renumber the Projs from a Load.
4494 static ir_node *gen_Proj_Load(ir_node *node)
4497 ir_node *block = be_transform_node(get_nodes_block(node));
4498 ir_node *pred = get_Proj_pred(node);
4499 dbg_info *dbgi = get_irn_dbg_info(node);
4500 long proj = get_Proj_proj(node);
4502 /* loads might be part of source address mode matches, so we don't
4503 * transform the ProjMs yet (with the exception of loads whose result is
4506 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4509 /* this is needed, because sometimes we have loops that are only
4510 reachable through the ProjM */
4511 be_enqueue_preds(node);
4512 /* do it in 2 steps, to silence firm verifier */
4513 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4514 set_Proj_proj(res, pn_ia32_mem);
4518 /* renumber the proj */
4519 new_pred = be_transform_node(pred);
4520 if (is_ia32_Load(new_pred)) {
4523 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4525 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4526 case pn_Load_X_regular:
4527 return new_rd_Jmp(dbgi, block);
4528 case pn_Load_X_except:
4529 /* This Load might raise an exception. Mark it. */
4530 set_ia32_exc_label(new_pred, 1);
4531 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4535 } else if (is_ia32_Conv_I2I(new_pred) ||
4536 is_ia32_Conv_I2I8Bit(new_pred)) {
4537 set_irn_mode(new_pred, mode_T);
4538 if (proj == pn_Load_res) {
4539 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4540 } else if (proj == pn_Load_M) {
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4543 } else if (is_ia32_xLoad(new_pred)) {
4546 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4548 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4549 case pn_Load_X_regular:
4550 return new_rd_Jmp(dbgi, block);
4551 case pn_Load_X_except:
4552 /* This Load might raise an exception. Mark it. */
4553 set_ia32_exc_label(new_pred, 1);
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4558 } else if (is_ia32_vfld(new_pred)) {
4561 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4563 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4564 case pn_Load_X_regular:
4565 return new_rd_Jmp(dbgi, block);
4566 case pn_Load_X_except:
4567 /* This Load might raise an exception. Mark it. */
4568 set_ia32_exc_label(new_pred, 1);
4569 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4574 /* can happen for ProJMs when source address mode happened for the
4577 /* however it should not be the result proj, as that would mean the
4578 load had multiple users and should not have been used for
4580 if (proj != pn_Load_M) {
4581 panic("internal error: transformed node not a Load");
4583 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4586 panic("No idea how to transform proj");
4590 * Transform and renumber the Projs from a Div or Mod instruction.
4592 static ir_node *gen_Proj_Div(ir_node *node)
4594 ir_node *block = be_transform_node(get_nodes_block(node));
4595 ir_node *pred = get_Proj_pred(node);
4596 ir_node *new_pred = be_transform_node(pred);
4597 dbg_info *dbgi = get_irn_dbg_info(node);
4598 long proj = get_Proj_proj(node);
4600 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4601 assert(pn_ia32_Div_div_res == pn_ia32_IDiv_div_res);
4605 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4606 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4607 } else if (is_ia32_xDiv(new_pred)) {
4608 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4609 } else if (is_ia32_vfdiv(new_pred)) {
4610 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4612 panic("Div transformed to unexpected thing %+F", new_pred);
4615 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4616 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4617 } else if (is_ia32_xDiv(new_pred)) {
4618 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4619 } else if (is_ia32_vfdiv(new_pred)) {
4620 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4622 panic("Div transformed to unexpected thing %+F", new_pred);
4624 case pn_Div_X_regular:
4625 return new_rd_Jmp(dbgi, block);
4626 case pn_Div_X_except:
4627 set_ia32_exc_label(new_pred, 1);
4628 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4633 panic("No idea how to transform proj->Div");
4637 * Transform and renumber the Projs from a Div or Mod instruction.
4639 static ir_node *gen_Proj_Mod(ir_node *node)
4641 ir_node *pred = get_Proj_pred(node);
4642 ir_node *new_pred = be_transform_node(pred);
4643 dbg_info *dbgi = get_irn_dbg_info(node);
4644 long proj = get_Proj_proj(node);
4646 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4647 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4648 assert(pn_ia32_Div_mod_res == pn_ia32_IDiv_mod_res);
4652 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4654 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4655 case pn_Mod_X_except:
4656 set_ia32_exc_label(new_pred, 1);
4657 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4661 panic("No idea how to transform proj->Mod");
4665 * Transform and renumber the Projs from a CopyB.
4667 static ir_node *gen_Proj_CopyB(ir_node *node)
4669 ir_node *pred = get_Proj_pred(node);
4670 ir_node *new_pred = be_transform_node(pred);
4671 dbg_info *dbgi = get_irn_dbg_info(node);
4672 long proj = get_Proj_proj(node);
4676 if (is_ia32_CopyB_i(new_pred)) {
4677 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4678 } else if (is_ia32_CopyB(new_pred)) {
4679 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4686 panic("No idea how to transform proj->CopyB");
4689 static ir_node *gen_be_Call(ir_node *node)
4691 dbg_info *const dbgi = get_irn_dbg_info(node);
4692 ir_node *const src_block = get_nodes_block(node);
4693 ir_node *const block = be_transform_node(src_block);
4694 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4695 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4696 ir_node *const sp = be_transform_node(src_sp);
4697 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4698 ia32_address_mode_t am;
4699 ia32_address_t *const addr = &am.addr;
4704 ir_node * eax = noreg_GP;
4705 ir_node * ecx = noreg_GP;
4706 ir_node * edx = noreg_GP;
4707 unsigned const pop = be_Call_get_pop(node);
4708 ir_type *const call_tp = be_Call_get_type(node);
4709 int old_no_pic_adjust;
4711 /* Run the x87 simulator if the call returns a float value */
4712 if (get_method_n_ress(call_tp) > 0) {
4713 ir_type *const res_type = get_method_res_type(call_tp, 0);
4714 ir_mode *const res_mode = get_type_mode(res_type);
4716 if (res_mode != NULL && mode_is_float(res_mode)) {
4717 ir_graph *irg = current_ir_graph;
4718 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4719 irg_data->do_x87_sim = 1;
4723 /* We do not want be_Call direct calls */
4724 assert(be_Call_get_entity(node) == NULL);
4726 /* special case for PIC trampoline calls */
4727 old_no_pic_adjust = ia32_no_pic_adjust;
4728 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4730 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4731 match_am | match_immediate);
4733 ia32_no_pic_adjust = old_no_pic_adjust;
4735 i = get_irn_arity(node) - 1;
4736 fpcw = be_transform_node(get_irn_n(node, i--));
4737 for (; i >= be_pos_Call_first_arg; --i) {
4738 arch_register_req_t const *const req = arch_get_register_req(node, i);
4739 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4741 assert(req->type == arch_register_req_type_limited);
4742 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4744 switch (*req->limited) {
4745 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4746 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4747 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4748 default: panic("Invalid GP register for register parameter");
4752 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4753 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4754 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4755 set_am_attributes(call, &am);
4756 call = fix_mem_proj(call, &am);
4758 if (get_irn_pinned(node) == op_pin_state_pinned)
4759 set_irn_pinned(call, op_pin_state_pinned);
4761 SET_IA32_ORIG_NODE(call, node);
4763 if (ia32_cg_config.use_sse2) {
4764 /* remember this call for post-processing */
4765 ARR_APP1(ir_node *, call_list, call);
4766 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4773 * Transform Builtin trap
4775 static ir_node *gen_trap(ir_node *node)
4777 dbg_info *dbgi = get_irn_dbg_info(node);
4778 ir_node *block = be_transform_node(get_nodes_block(node));
4779 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4781 return new_bd_ia32_UD2(dbgi, block, mem);
4785 * Transform Builtin debugbreak
4787 static ir_node *gen_debugbreak(ir_node *node)
4789 dbg_info *dbgi = get_irn_dbg_info(node);
4790 ir_node *block = be_transform_node(get_nodes_block(node));
4791 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4793 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4797 * Transform Builtin return_address
4799 static ir_node *gen_return_address(ir_node *node)
4801 ir_node *param = get_Builtin_param(node, 0);
4802 ir_node *frame = get_Builtin_param(node, 1);
4803 dbg_info *dbgi = get_irn_dbg_info(node);
4804 ir_tarval *tv = get_Const_tarval(param);
4805 unsigned long value = get_tarval_long(tv);
4807 ir_node *block = be_transform_node(get_nodes_block(node));
4808 ir_node *ptr = be_transform_node(frame);
4812 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4813 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4814 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4817 /* load the return address from this frame */
4818 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4820 set_irn_pinned(load, get_irn_pinned(node));
4821 set_ia32_op_type(load, ia32_AddrModeS);
4822 set_ia32_ls_mode(load, mode_Iu);
4824 set_ia32_am_offs_int(load, 0);
4825 set_ia32_use_frame(load);
4826 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4828 if (get_irn_pinned(node) == op_pin_state_floats) {
4829 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4830 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4831 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4832 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4835 SET_IA32_ORIG_NODE(load, node);
4836 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4840 * Transform Builtin frame_address
4842 static ir_node *gen_frame_address(ir_node *node)
4844 ir_node *param = get_Builtin_param(node, 0);
4845 ir_node *frame = get_Builtin_param(node, 1);
4846 dbg_info *dbgi = get_irn_dbg_info(node);
4847 ir_tarval *tv = get_Const_tarval(param);
4848 unsigned long value = get_tarval_long(tv);
4850 ir_node *block = be_transform_node(get_nodes_block(node));
4851 ir_node *ptr = be_transform_node(frame);
4856 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4857 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4858 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4861 /* load the frame address from this frame */
4862 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4864 set_irn_pinned(load, get_irn_pinned(node));
4865 set_ia32_op_type(load, ia32_AddrModeS);
4866 set_ia32_ls_mode(load, mode_Iu);
4868 ent = ia32_get_frame_address_entity();
4870 set_ia32_am_offs_int(load, 0);
4871 set_ia32_use_frame(load);
4872 set_ia32_frame_ent(load, ent);
4874 /* will fail anyway, but gcc does this: */
4875 set_ia32_am_offs_int(load, 0);
4878 if (get_irn_pinned(node) == op_pin_state_floats) {
4879 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4880 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4881 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4882 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4885 SET_IA32_ORIG_NODE(load, node);
4886 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4890 * Transform Builtin frame_address
4892 static ir_node *gen_prefetch(ir_node *node)
4895 ir_node *ptr, *block, *mem, *base, *index;
4896 ir_node *param, *new_node;
4899 ia32_address_t addr;
4901 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4902 /* no prefetch at all, route memory */
4903 return be_transform_node(get_Builtin_mem(node));
4906 param = get_Builtin_param(node, 1);
4907 tv = get_Const_tarval(param);
4908 rw = get_tarval_long(tv);
4910 /* construct load address */
4911 memset(&addr, 0, sizeof(addr));
4912 ptr = get_Builtin_param(node, 0);
4913 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4920 base = be_transform_node(base);
4923 if (index == NULL) {
4926 index = be_transform_node(index);
4929 dbgi = get_irn_dbg_info(node);
4930 block = be_transform_node(get_nodes_block(node));
4931 mem = be_transform_node(get_Builtin_mem(node));
4933 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4934 /* we have 3DNow!, this was already checked above */
4935 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4936 } else if (ia32_cg_config.use_sse_prefetch) {
4937 /* note: rw == 1 is IGNORED in that case */
4938 param = get_Builtin_param(node, 2);
4939 tv = get_Const_tarval(param);
4940 locality = get_tarval_long(tv);
4942 /* SSE style prefetch */
4945 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4948 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4951 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4954 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4958 assert(ia32_cg_config.use_3dnow_prefetch);
4959 /* 3DNow! style prefetch */
4960 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4963 set_irn_pinned(new_node, get_irn_pinned(node));
4964 set_ia32_op_type(new_node, ia32_AddrModeS);
4965 set_ia32_ls_mode(new_node, mode_Bu);
4966 set_address(new_node, &addr);
4968 SET_IA32_ORIG_NODE(new_node, node);
4970 be_dep_on_frame(new_node);
4971 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4975 * Transform bsf like node
4977 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4979 ir_node *param = get_Builtin_param(node, 0);
4980 dbg_info *dbgi = get_irn_dbg_info(node);
4982 ir_node *block = get_nodes_block(node);
4983 ir_node *new_block = be_transform_node(block);
4985 ia32_address_mode_t am;
4986 ia32_address_t *addr = &am.addr;
4989 match_arguments(&am, block, NULL, param, NULL, match_am);
4991 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4992 set_am_attributes(cnt, &am);
4993 set_ia32_ls_mode(cnt, get_irn_mode(param));
4995 SET_IA32_ORIG_NODE(cnt, node);
4996 return fix_mem_proj(cnt, &am);
5000 * Transform builtin ffs.
5002 static ir_node *gen_ffs(ir_node *node)
5004 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5005 ir_node *real = skip_Proj(bsf);
5006 dbg_info *dbgi = get_irn_dbg_info(real);
5007 ir_node *block = get_nodes_block(real);
5008 ir_node *flag, *set, *conv, *neg, *orn;
5011 if (get_irn_mode(real) != mode_T) {
5012 set_irn_mode(real, mode_T);
5013 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5016 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5019 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5020 SET_IA32_ORIG_NODE(set, node);
5023 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5024 SET_IA32_ORIG_NODE(conv, node);
5027 neg = new_bd_ia32_Neg(dbgi, block, conv);
5030 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5031 set_ia32_commutative(orn);
5034 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, orn, ia32_create_Immediate(NULL, 0, 1));
5038 * Transform builtin clz.
5040 static ir_node *gen_clz(ir_node *node)
5042 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5043 ir_node *real = skip_Proj(bsr);
5044 dbg_info *dbgi = get_irn_dbg_info(real);
5045 ir_node *block = get_nodes_block(real);
5046 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5048 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5052 * Transform builtin ctz.
5054 static ir_node *gen_ctz(ir_node *node)
5056 return gen_unop_AM(node, new_bd_ia32_Bsf);
5060 * Transform builtin parity.
5062 static ir_node *gen_parity(ir_node *node)
5064 ir_node *param = get_Builtin_param(node, 0);
5065 dbg_info *dbgi = get_irn_dbg_info(node);
5067 ir_node *block = get_nodes_block(node);
5069 ir_node *new_block = be_transform_node(block);
5070 ir_node *imm, *cmp, *new_node;
5072 ia32_address_mode_t am;
5073 ia32_address_t *addr = &am.addr;
5077 match_arguments(&am, block, NULL, param, NULL, match_am);
5078 imm = ia32_create_Immediate(NULL, 0, 0);
5079 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5080 addr->mem, imm, am.new_op2, am.ins_permuted);
5081 set_am_attributes(cmp, &am);
5082 set_ia32_ls_mode(cmp, mode_Iu);
5084 SET_IA32_ORIG_NODE(cmp, node);
5086 cmp = fix_mem_proj(cmp, &am);
5089 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_cc_parity);
5090 SET_IA32_ORIG_NODE(new_node, node);
5093 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5094 nomem, new_node, mode_Bu);
5095 SET_IA32_ORIG_NODE(new_node, node);
5100 * Transform builtin popcount
5102 static ir_node *gen_popcount(ir_node *node)
5104 ir_node *param = get_Builtin_param(node, 0);
5105 dbg_info *dbgi = get_irn_dbg_info(node);
5107 ir_node *block = get_nodes_block(node);
5108 ir_node *new_block = be_transform_node(block);
5111 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5113 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5114 if (ia32_cg_config.use_popcnt) {
5115 ia32_address_mode_t am;
5116 ia32_address_t *addr = &am.addr;
5119 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5121 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5122 set_am_attributes(cnt, &am);
5123 set_ia32_ls_mode(cnt, get_irn_mode(param));
5125 SET_IA32_ORIG_NODE(cnt, node);
5126 return fix_mem_proj(cnt, &am);
5129 new_param = be_transform_node(param);
5131 /* do the standard popcount algo */
5133 /* m1 = x & 0x55555555 */
5134 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5135 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5138 simm = ia32_create_Immediate(NULL, 0, 1);
5139 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5141 /* m2 = s1 & 0x55555555 */
5142 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5145 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5147 /* m4 = m3 & 0x33333333 */
5148 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5149 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5152 simm = ia32_create_Immediate(NULL, 0, 2);
5153 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5155 /* m5 = s2 & 0x33333333 */
5156 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5159 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5161 /* m7 = m6 & 0x0F0F0F0F */
5162 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5163 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5166 simm = ia32_create_Immediate(NULL, 0, 4);
5167 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5169 /* m8 = s3 & 0x0F0F0F0F */
5170 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5173 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5175 /* m10 = m9 & 0x00FF00FF */
5176 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5177 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5180 simm = ia32_create_Immediate(NULL, 0, 8);
5181 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5183 /* m11 = s4 & 0x00FF00FF */
5184 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5186 /* m12 = m10 + m11 */
5187 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5189 /* m13 = m12 & 0x0000FFFF */
5190 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5191 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5193 /* s5 = m12 >> 16 */
5194 simm = ia32_create_Immediate(NULL, 0, 16);
5195 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5197 /* res = m13 + s5 */
5198 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5202 * Transform builtin byte swap.
5204 static ir_node *gen_bswap(ir_node *node)
5206 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5207 dbg_info *dbgi = get_irn_dbg_info(node);
5209 ir_node *block = get_nodes_block(node);
5210 ir_node *new_block = be_transform_node(block);
5211 ir_mode *mode = get_irn_mode(param);
5212 unsigned size = get_mode_size_bits(mode);
5213 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5217 if (ia32_cg_config.use_i486) {
5218 /* swap available */
5219 return new_bd_ia32_Bswap(dbgi, new_block, param);
5221 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5222 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5224 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5225 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5227 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5229 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5230 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5232 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5233 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5236 /* swap16 always available */
5237 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5240 panic("Invalid bswap size (%d)", size);
5245 * Transform builtin outport.
5247 static ir_node *gen_outport(ir_node *node)
5249 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5250 ir_node *oldv = get_Builtin_param(node, 1);
5251 ir_mode *mode = get_irn_mode(oldv);
5252 ir_node *value = be_transform_node(oldv);
5253 ir_node *block = be_transform_node(get_nodes_block(node));
5254 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5255 dbg_info *dbgi = get_irn_dbg_info(node);
5257 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5258 set_ia32_ls_mode(res, mode);
5263 * Transform builtin inport.
5265 static ir_node *gen_inport(ir_node *node)
5267 ir_type *tp = get_Builtin_type(node);
5268 ir_type *rstp = get_method_res_type(tp, 0);
5269 ir_mode *mode = get_type_mode(rstp);
5270 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5271 ir_node *block = be_transform_node(get_nodes_block(node));
5272 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5273 dbg_info *dbgi = get_irn_dbg_info(node);
5275 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5276 set_ia32_ls_mode(res, mode);
5278 /* check for missing Result Proj */
5283 * Transform a builtin inner trampoline
5285 static ir_node *gen_inner_trampoline(ir_node *node)
5287 ir_node *ptr = get_Builtin_param(node, 0);
5288 ir_node *callee = get_Builtin_param(node, 1);
5289 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5290 ir_node *mem = get_Builtin_mem(node);
5291 ir_node *block = get_nodes_block(node);
5292 ir_node *new_block = be_transform_node(block);
5296 ir_node *trampoline;
5298 dbg_info *dbgi = get_irn_dbg_info(node);
5299 ia32_address_t addr;
5301 /* construct store address */
5302 memset(&addr, 0, sizeof(addr));
5303 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5305 if (addr.base == NULL) {
5306 addr.base = noreg_GP;
5308 addr.base = be_transform_node(addr.base);
5311 if (addr.index == NULL) {
5312 addr.index = noreg_GP;
5314 addr.index = be_transform_node(addr.index);
5316 addr.mem = be_transform_node(mem);
5318 /* mov ecx, <env> */
5319 val = ia32_create_Immediate(NULL, 0, 0xB9);
5320 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5321 addr.index, addr.mem, val);
5322 set_irn_pinned(store, get_irn_pinned(node));
5323 set_ia32_op_type(store, ia32_AddrModeD);
5324 set_ia32_ls_mode(store, mode_Bu);
5325 set_address(store, &addr);
5329 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5330 addr.index, addr.mem, env);
5331 set_irn_pinned(store, get_irn_pinned(node));
5332 set_ia32_op_type(store, ia32_AddrModeD);
5333 set_ia32_ls_mode(store, mode_Iu);
5334 set_address(store, &addr);
5338 /* jmp rel <callee> */
5339 val = ia32_create_Immediate(NULL, 0, 0xE9);
5340 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5341 addr.index, addr.mem, val);
5342 set_irn_pinned(store, get_irn_pinned(node));
5343 set_ia32_op_type(store, ia32_AddrModeD);
5344 set_ia32_ls_mode(store, mode_Bu);
5345 set_address(store, &addr);
5349 trampoline = be_transform_node(ptr);
5351 /* the callee is typically an immediate */
5352 if (is_SymConst(callee)) {
5353 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5355 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5357 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5359 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5360 addr.index, addr.mem, rel);
5361 set_irn_pinned(store, get_irn_pinned(node));
5362 set_ia32_op_type(store, ia32_AddrModeD);
5363 set_ia32_ls_mode(store, mode_Iu);
5364 set_address(store, &addr);
5369 return new_r_Tuple(new_block, 2, in);
5373 * Transform Builtin node.
5375 static ir_node *gen_Builtin(ir_node *node)
5377 ir_builtin_kind kind = get_Builtin_kind(node);
5381 return gen_trap(node);
5382 case ir_bk_debugbreak:
5383 return gen_debugbreak(node);
5384 case ir_bk_return_address:
5385 return gen_return_address(node);
5386 case ir_bk_frame_address:
5387 return gen_frame_address(node);
5388 case ir_bk_prefetch:
5389 return gen_prefetch(node);
5391 return gen_ffs(node);
5393 return gen_clz(node);
5395 return gen_ctz(node);
5397 return gen_parity(node);
5398 case ir_bk_popcount:
5399 return gen_popcount(node);
5401 return gen_bswap(node);
5403 return gen_outport(node);
5405 return gen_inport(node);
5406 case ir_bk_inner_trampoline:
5407 return gen_inner_trampoline(node);
5409 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5413 * Transform Proj(Builtin) node.
5415 static ir_node *gen_Proj_Builtin(ir_node *proj)
5417 ir_node *node = get_Proj_pred(proj);
5418 ir_node *new_node = be_transform_node(node);
5419 ir_builtin_kind kind = get_Builtin_kind(node);
5422 case ir_bk_return_address:
5423 case ir_bk_frame_address:
5428 case ir_bk_popcount:
5430 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5433 case ir_bk_debugbreak:
5434 case ir_bk_prefetch:
5436 assert(get_Proj_proj(proj) == pn_Builtin_M);
5439 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5440 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5442 assert(get_Proj_proj(proj) == pn_Builtin_M);
5443 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5445 case ir_bk_inner_trampoline:
5446 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5447 return get_Tuple_pred(new_node, 1);
5449 assert(get_Proj_proj(proj) == pn_Builtin_M);
5450 return get_Tuple_pred(new_node, 0);
5453 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5456 static ir_node *gen_be_IncSP(ir_node *node)
5458 ir_node *res = be_duplicate_node(node);
5459 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5465 * Transform the Projs from a be_Call.
5467 static ir_node *gen_Proj_be_Call(ir_node *node)
5469 ir_node *call = get_Proj_pred(node);
5470 ir_node *new_call = be_transform_node(call);
5471 dbg_info *dbgi = get_irn_dbg_info(node);
5472 long proj = get_Proj_proj(node);
5473 ir_mode *mode = get_irn_mode(node);
5476 if (proj == pn_be_Call_M_regular) {
5477 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5479 /* transform call modes */
5480 if (mode_is_data(mode)) {
5481 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5485 /* Map from be_Call to ia32_Call proj number */
5486 if (proj == pn_be_Call_sp) {
5487 proj = pn_ia32_Call_stack;
5488 } else if (proj == pn_be_Call_M_regular) {
5489 proj = pn_ia32_Call_M;
5491 arch_register_req_t const *const req = arch_get_register_req_out(node);
5492 int const n_outs = arch_irn_get_n_outs(new_call);
5495 assert(proj >= pn_be_Call_first_res);
5496 assert(req->type & arch_register_req_type_limited);
5498 for (i = 0; i < n_outs; ++i) {
5499 arch_register_req_t const *const new_req
5500 = arch_get_out_register_req(new_call, i);
5502 if (!(new_req->type & arch_register_req_type_limited) ||
5503 new_req->cls != req->cls ||
5504 *new_req->limited != *req->limited)
5513 res = new_rd_Proj(dbgi, new_call, mode, proj);
5515 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5517 case pn_ia32_Call_stack:
5518 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5521 case pn_ia32_Call_fpcw:
5522 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5530 * Transform the Projs from a Cmp.
5532 static ir_node *gen_Proj_Cmp(ir_node *node)
5534 /* this probably means not all mode_b nodes were lowered... */
5535 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5539 static ir_node *gen_Proj_ASM(ir_node *node)
5541 ir_mode *mode = get_irn_mode(node);
5542 ir_node *pred = get_Proj_pred(node);
5543 ir_node *new_pred = be_transform_node(pred);
5544 long pos = get_Proj_proj(node);
5546 if (mode == mode_M) {
5547 pos = arch_irn_get_n_outs(new_pred)-1;
5548 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5550 } else if (mode_is_float(mode)) {
5553 panic("unexpected proj mode at ASM");
5556 return new_r_Proj(new_pred, mode, pos);
5560 * Transform and potentially renumber Proj nodes.
5562 static ir_node *gen_Proj(ir_node *node)
5564 ir_node *pred = get_Proj_pred(node);
5567 switch (get_irn_opcode(pred)) {
5569 proj = get_Proj_proj(node);
5570 if (proj == pn_Store_M) {
5571 return be_transform_node(pred);
5573 panic("No idea how to transform proj->Store");
5576 return gen_Proj_Load(node);
5578 return gen_Proj_ASM(node);
5580 return gen_Proj_Builtin(node);
5582 return gen_Proj_Div(node);
5584 return gen_Proj_Mod(node);
5586 return gen_Proj_CopyB(node);
5588 return gen_Proj_be_SubSP(node);
5590 return gen_Proj_be_AddSP(node);
5592 return gen_Proj_be_Call(node);
5594 return gen_Proj_Cmp(node);
5596 proj = get_Proj_proj(node);
5598 case pn_Start_X_initial_exec: {
5599 ir_node *block = get_nodes_block(pred);
5600 ir_node *new_block = be_transform_node(block);
5601 dbg_info *dbgi = get_irn_dbg_info(node);
5602 /* we exchange the ProjX with a jump */
5603 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5608 case pn_Start_P_tls:
5609 return ia32_gen_Proj_tls(node);
5614 if (is_ia32_l_FloattoLL(pred)) {
5615 return gen_Proj_l_FloattoLL(node);
5617 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5621 ir_mode *mode = get_irn_mode(node);
5622 if (ia32_mode_needs_gp_reg(mode)) {
5623 ir_node *new_pred = be_transform_node(pred);
5624 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5625 get_Proj_proj(node));
5626 new_proj->node_nr = node->node_nr;
5631 return be_duplicate_node(node);
5635 * Enters all transform functions into the generic pointer
5637 static void register_transformers(void)
5639 /* first clear the generic function pointer for all ops */
5640 be_start_transform_setup();
5642 be_set_transform_function(op_Add, gen_Add);
5643 be_set_transform_function(op_And, gen_And);
5644 be_set_transform_function(op_ASM, ia32_gen_ASM);
5645 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5646 be_set_transform_function(op_be_Call, gen_be_Call);
5647 be_set_transform_function(op_be_Copy, gen_be_Copy);
5648 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5649 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5650 be_set_transform_function(op_be_Return, gen_be_Return);
5651 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5652 be_set_transform_function(op_Builtin, gen_Builtin);
5653 be_set_transform_function(op_Cmp, gen_Cmp);
5654 be_set_transform_function(op_Cond, gen_Cond);
5655 be_set_transform_function(op_Const, gen_Const);
5656 be_set_transform_function(op_Conv, gen_Conv);
5657 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5658 be_set_transform_function(op_Div, gen_Div);
5659 be_set_transform_function(op_Eor, gen_Eor);
5660 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5661 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5662 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5663 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5664 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5665 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5666 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5667 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5668 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5669 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5670 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5671 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5672 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5673 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5674 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5675 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5676 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5677 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5678 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5679 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5680 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5681 be_set_transform_function(op_IJmp, gen_IJmp);
5682 be_set_transform_function(op_Jmp, gen_Jmp);
5683 be_set_transform_function(op_Load, gen_Load);
5684 be_set_transform_function(op_Minus, gen_Minus);
5685 be_set_transform_function(op_Mod, gen_Mod);
5686 be_set_transform_function(op_Mul, gen_Mul);
5687 be_set_transform_function(op_Mulh, gen_Mulh);
5688 be_set_transform_function(op_Mux, gen_Mux);
5689 be_set_transform_function(op_Not, gen_Not);
5690 be_set_transform_function(op_Or, gen_Or);
5691 be_set_transform_function(op_Phi, gen_Phi);
5692 be_set_transform_function(op_Proj, gen_Proj);
5693 be_set_transform_function(op_Rotl, gen_Rotl);
5694 be_set_transform_function(op_Shl, gen_Shl);
5695 be_set_transform_function(op_Shr, gen_Shr);
5696 be_set_transform_function(op_Shrs, gen_Shrs);
5697 be_set_transform_function(op_Store, gen_Store);
5698 be_set_transform_function(op_Sub, gen_Sub);
5699 be_set_transform_function(op_SymConst, gen_SymConst);
5700 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5704 * Pre-transform all unknown and noreg nodes.
5706 static void ia32_pretransform_node(void)
5708 ir_graph *irg = current_ir_graph;
5709 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5711 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5712 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5713 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5715 nomem = get_irg_no_mem(irg);
5716 noreg_GP = ia32_new_NoReg_gp(irg);
5722 * Post-process all calls if we are in SSE mode.
5723 * The ABI requires that the results are in st0, copy them
5724 * to a xmm register.
5726 static void postprocess_fp_call_results(void)
5730 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5731 ir_node *call = call_list[i];
5732 ir_type *mtp = call_types[i];
5735 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5736 ir_type *res_tp = get_method_res_type(mtp, j);
5737 ir_node *res, *new_res;
5738 const ir_edge_t *edge, *next;
5741 if (! is_atomic_type(res_tp)) {
5742 /* no floating point return */
5745 mode = get_type_mode(res_tp);
5746 if (! mode_is_float(mode)) {
5747 /* no floating point return */
5751 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5754 /* now patch the users */
5755 foreach_out_edge_safe(res, edge, next) {
5756 ir_node *succ = get_edge_src_irn(edge);
5759 if (be_is_Keep(succ))
5762 if (is_ia32_xStore(succ)) {
5763 /* an xStore can be patched into an vfst */
5764 dbg_info *db = get_irn_dbg_info(succ);
5765 ir_node *block = get_nodes_block(succ);
5766 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5767 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5768 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5769 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5770 ir_mode *mode = get_ia32_ls_mode(succ);
5772 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5773 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5774 if (is_ia32_use_frame(succ))
5775 set_ia32_use_frame(st);
5776 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5777 set_irn_pinned(st, get_irn_pinned(succ));
5778 set_ia32_op_type(st, ia32_AddrModeD);
5782 if (new_res == NULL) {
5783 dbg_info *db = get_irn_dbg_info(call);
5784 ir_node *block = get_nodes_block(call);
5785 ir_node *frame = get_irg_frame(current_ir_graph);
5786 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5787 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5788 ir_node *vfst, *xld, *new_mem;
5790 /* store st(0) on stack */
5791 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5792 set_ia32_op_type(vfst, ia32_AddrModeD);
5793 set_ia32_use_frame(vfst);
5795 /* load into SSE register */
5796 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5797 set_ia32_op_type(xld, ia32_AddrModeS);
5798 set_ia32_use_frame(xld);
5800 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5801 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5803 if (old_mem != NULL) {
5804 edges_reroute(old_mem, new_mem, current_ir_graph);
5808 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5815 /* do the transformation */
5816 void ia32_transform_graph(ir_graph *irg)
5820 register_transformers();
5821 initial_fpcw = NULL;
5822 ia32_no_pic_adjust = 0;
5824 be_timer_push(T_HEIGHTS);
5825 ia32_heights = heights_new(irg);
5826 be_timer_pop(T_HEIGHTS);
5827 ia32_calculate_non_address_mode_nodes(irg);
5829 /* the transform phase is not safe for CSE (yet) because several nodes get
5830 * attributes set after their creation */
5831 cse_last = get_opt_cse();
5834 call_list = NEW_ARR_F(ir_node *, 0);
5835 call_types = NEW_ARR_F(ir_type *, 0);
5836 be_transform_graph(irg, ia32_pretransform_node);
5838 if (ia32_cg_config.use_sse2)
5839 postprocess_fp_call_results();
5840 DEL_ARR_F(call_types);
5841 DEL_ARR_F(call_list);
5843 set_opt_cse(cse_last);
5845 ia32_free_non_address_mode_nodes();
5846 heights_free(ia32_heights);
5847 ia32_heights = NULL;
5850 void ia32_init_transform(void)
5852 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");