2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
56 #include "betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
884 am->op_type = ia32_Normal;
886 if (flags & match_try_am) {
892 mode = get_irn_mode(op2);
893 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
894 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
896 new_op2 = create_upconv(op2, NULL);
897 am->ls_mode = mode_Iu;
899 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
901 new_op2 = be_transform_node(op2);
902 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
905 if (addr->base == NULL)
906 addr->base = noreg_GP;
907 if (addr->index == NULL)
908 addr->index = noreg_GP;
909 if (addr->mem == NULL)
912 am->new_op1 = new_op1;
913 am->new_op2 = new_op2;
914 am->commutative = commutative;
918 * "Fixes" a node that uses address mode by turning it into mode_T
919 * and returning a pn_ia32_res Proj.
921 * @param node the node
922 * @param am its address mode
924 * @return a Proj(pn_ia32_res) if a memory address mode is used,
927 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
932 if (am->mem_proj == NULL)
935 /* we have to create a mode_T so the old MemProj can attach to us */
936 mode = get_irn_mode(node);
937 load = get_Proj_pred(am->mem_proj);
939 be_set_transformed_node(load, node);
941 if (mode != mode_T) {
942 set_irn_mode(node, mode_T);
943 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
950 * Construct a standard binary operation, set AM and immediate if required.
952 * @param node The original node for which the binop is created
953 * @param op1 The first operand
954 * @param op2 The second operand
955 * @param func The node constructor function
956 * @return The constructed ia32 node.
958 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
959 construct_binop_func *func, match_flags_t flags)
962 ir_node *block, *new_block, *new_node;
963 ia32_address_mode_t am;
964 ia32_address_t *addr = &am.addr;
966 block = get_nodes_block(node);
967 match_arguments(&am, block, op1, op2, NULL, flags);
969 dbgi = get_irn_dbg_info(node);
970 new_block = be_transform_node(block);
971 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
972 am.new_op1, am.new_op2);
973 set_am_attributes(new_node, &am);
974 /* we can't use source address mode anymore when using immediates */
975 if (!(flags & match_am_and_immediates) &&
976 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
977 set_ia32_am_support(new_node, ia32_am_none);
978 SET_IA32_ORIG_NODE(new_node, node);
980 new_node = fix_mem_proj(new_node, &am);
986 * Generic names for the inputs of an ia32 binary op.
989 n_ia32_l_binop_left, /**< ia32 left input */
990 n_ia32_l_binop_right, /**< ia32 right input */
991 n_ia32_l_binop_eflags /**< ia32 eflags input */
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1001 * Construct a binary operation which also consumes the eflags.
1003 * @param node The node to transform
1004 * @param func The node constructor function
1005 * @param flags The match flags
1006 * @return The constructor ia32 node
1008 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1009 match_flags_t flags)
1011 ir_node *src_block = get_nodes_block(node);
1012 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1013 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1014 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1016 ir_node *block, *new_node, *new_eflags;
1017 ia32_address_mode_t am;
1018 ia32_address_t *addr = &am.addr;
1020 match_arguments(&am, src_block, op1, op2, eflags, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 block = be_transform_node(src_block);
1024 new_eflags = be_transform_node(eflags);
1025 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1026 am.new_op1, am.new_op2, new_eflags);
1027 set_am_attributes(new_node, &am);
1028 /* we can't use source address mode anymore when using immediates */
1029 if (!(flags & match_am_and_immediates) &&
1030 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1031 set_ia32_am_support(new_node, ia32_am_none);
1032 SET_IA32_ORIG_NODE(new_node, node);
1034 new_node = fix_mem_proj(new_node, &am);
1039 static ir_node *get_fpcw(void)
1041 if (initial_fpcw != NULL)
1042 return initial_fpcw;
1044 initial_fpcw = be_transform_node(old_initial_fpcw);
1045 return initial_fpcw;
1049 * Construct a standard binary operation, set AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_binop_float_func *func)
1059 ir_mode *mode = get_irn_mode(node);
1061 ir_node *block, *new_block, *new_node;
1062 ia32_address_mode_t am;
1063 ia32_address_t *addr = &am.addr;
1064 ia32_x87_attr_t *attr;
1065 /* All operations are considered commutative, because there are reverse
1067 match_flags_t flags = match_commutative;
1069 /* happens for div nodes... */
1070 if (mode == mode_T) {
1072 mode = get_Div_resmode(node);
1074 panic("can't determine mode");
1077 /* cannot use address mode with long double on x87 */
1078 if (get_mode_size_bits(mode) <= 64)
1081 block = get_nodes_block(node);
1082 match_arguments(&am, block, op1, op2, NULL, flags);
1084 dbgi = get_irn_dbg_info(node);
1085 new_block = be_transform_node(block);
1086 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1087 am.new_op1, am.new_op2, get_fpcw());
1088 set_am_attributes(new_node, &am);
1090 attr = get_ia32_x87_attr(new_node);
1091 attr->attr.data.ins_permuted = am.ins_permuted;
1093 SET_IA32_ORIG_NODE(new_node, node);
1095 new_node = fix_mem_proj(new_node, &am);
1101 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1103 * @param op1 The first operand
1104 * @param op2 The second operand
1105 * @param func The node constructor function
1106 * @return The constructed ia32 node.
1108 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1109 construct_shift_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1114 ir_mode *mode = get_irn_mode(node);
1116 assert(! mode_is_float(mode));
1117 assert(flags & match_immediate);
1118 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1120 if (get_mode_modulo_shift(mode) != 32)
1121 panic("modulo shift!=32 not supported by ia32 backend");
1123 if (flags & match_mode_neutral) {
1124 op1 = ia32_skip_downconv(op1);
1125 new_op1 = be_transform_node(op1);
1126 } else if (get_mode_size_bits(mode) != 32) {
1127 new_op1 = create_upconv(op1, node);
1129 new_op1 = be_transform_node(op1);
1132 /* the shift amount can be any mode that is bigger than 5 bits, since all
1133 * other bits are ignored anyway */
1134 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1135 ir_node *const op = get_Conv_op(op2);
1136 if (mode_is_float(get_irn_mode(op)))
1139 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1141 new_op2 = create_immediate_or_transform(op2, 0);
1143 dbgi = get_irn_dbg_info(node);
1144 block = get_nodes_block(node);
1145 new_block = be_transform_node(block);
1146 new_node = func(dbgi, new_block, new_op1, new_op2);
1147 SET_IA32_ORIG_NODE(new_node, node);
1149 /* lowered shift instruction may have a dependency operand, handle it here */
1150 if (get_irn_arity(node) == 3) {
1151 /* we have a dependency */
1152 ir_node* dep = get_irn_n(node, 2);
1153 if (get_irn_n_edges(dep) > 1) {
1154 /* ... which has at least one user other than 'node' */
1155 ir_node *new_dep = be_transform_node(dep);
1156 add_irn_dep(new_node, new_dep);
1165 * Construct a standard unary operation, set AM and immediate if required.
1167 * @param op The operand
1168 * @param func The node constructor function
1169 * @return The constructed ia32 node.
1171 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1172 match_flags_t flags)
1175 ir_node *block, *new_block, *new_op, *new_node;
1177 assert(flags == 0 || flags == match_mode_neutral);
1178 if (flags & match_mode_neutral) {
1179 op = ia32_skip_downconv(op);
1182 new_op = be_transform_node(op);
1183 dbgi = get_irn_dbg_info(node);
1184 block = get_nodes_block(node);
1185 new_block = be_transform_node(block);
1186 new_node = func(dbgi, new_block, new_op);
1188 SET_IA32_ORIG_NODE(new_node, node);
1193 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1194 ia32_address_t *addr)
1204 base = be_transform_node(base);
1211 idx = be_transform_node(idx);
1214 /* segment overrides are ineffective for Leas :-( so we have to patch
1216 if (addr->tls_segment) {
1217 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1218 assert(addr->symconst_ent != NULL);
1219 if (base == noreg_GP)
1222 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1223 addr->tls_segment = false;
1226 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1227 set_address(res, addr);
1233 * Returns non-zero if a given address mode has a symbolic or
1234 * numerical offset != 0.
1236 static int am_has_immediates(const ia32_address_t *addr)
1238 return addr->offset != 0 || addr->symconst_ent != NULL
1239 || addr->frame_entity || addr->use_frame;
1242 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1243 ir_node *high, ir_node *low,
1247 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1248 * op1 - target to be shifted
1249 * op2 - contains bits to be shifted into target
1251 * Only op3 can be an immediate.
1253 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1254 ir_node *high, ir_node *low, ir_node *count,
1255 new_shiftd_func func)
1257 ir_node *new_block = be_transform_node(block);
1258 ir_node *new_high = be_transform_node(high);
1259 ir_node *new_low = be_transform_node(low);
1263 /* the shift amount can be any mode that is bigger than 5 bits, since all
1264 * other bits are ignored anyway */
1265 while (is_Conv(count) &&
1266 get_irn_n_edges(count) == 1 &&
1267 mode_is_int(get_irn_mode(count))) {
1268 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1269 count = get_Conv_op(count);
1271 new_count = create_immediate_or_transform(count, 0);
1273 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1278 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1281 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1283 if (is_Const(value1) && is_Const(value2)) {
1284 ir_tarval *tv1 = get_Const_tarval(value1);
1285 ir_tarval *tv2 = get_Const_tarval(value2);
1286 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1287 long v1 = get_tarval_long(tv1);
1288 long v2 = get_tarval_long(tv2);
1289 return v1 <= v2 && v2 == 32-v1;
1295 static ir_node *match_64bit_shift(ir_node *node)
1297 ir_node *op1 = get_binop_left(node);
1298 ir_node *op2 = get_binop_right(node);
1299 assert(is_Or(node) || is_Add(node));
1307 /* match ShlD operation */
1308 if (is_Shl(op1) && is_Shr(op2)) {
1309 ir_node *shl_right = get_Shl_right(op1);
1310 ir_node *shl_left = get_Shl_left(op1);
1311 ir_node *shr_right = get_Shr_right(op2);
1312 ir_node *shr_left = get_Shr_left(op2);
1313 /* constant ShlD operation */
1314 if (is_complementary_shifts(shl_right, shr_right)) {
1315 dbg_info *dbgi = get_irn_dbg_info(node);
1316 ir_node *block = get_nodes_block(node);
1317 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1320 /* constant ShrD operation */
1321 if (is_complementary_shifts(shr_right, shl_right)) {
1322 dbg_info *dbgi = get_irn_dbg_info(node);
1323 ir_node *block = get_nodes_block(node);
1324 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1327 /* lower_dw produces the following for ShlD:
1328 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1329 if (is_Shr(shr_left) && is_Not(shr_right)
1330 && is_Const_1(get_Shr_right(shr_left))
1331 && get_Not_op(shr_right) == shl_right) {
1332 dbg_info *dbgi = get_irn_dbg_info(node);
1333 ir_node *block = get_nodes_block(node);
1334 ir_node *val_h = get_Shr_left(shr_left);
1335 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1338 /* lower_dw produces the following for ShrD:
1339 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1340 if (is_Shl(shl_left) && is_Not(shl_right)
1341 && is_Const_1(get_Shl_right(shl_left))
1342 && get_Not_op(shl_right) == shr_right) {
1343 dbg_info *dbgi = get_irn_dbg_info(node);
1344 ir_node *block = get_nodes_block(node);
1345 ir_node *val_h = get_Shl_left(shl_left);
1346 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1355 * Creates an ia32 Add.
1357 * @return the created ia32 Add node
1359 static ir_node *gen_Add(ir_node *node)
1361 ir_mode *mode = get_irn_mode(node);
1362 ir_node *op1 = get_Add_left(node);
1363 ir_node *op2 = get_Add_right(node);
1365 ir_node *block, *new_block, *new_node, *add_immediate_op;
1366 ia32_address_t addr;
1367 ia32_address_mode_t am;
1369 new_node = match_64bit_shift(node);
1370 if (new_node != NULL)
1373 if (mode_is_float(mode)) {
1374 if (ia32_cg_config.use_sse2)
1375 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1376 match_commutative | match_am);
1378 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1381 ia32_mark_non_am(node);
1383 op2 = ia32_skip_downconv(op2);
1384 op1 = ia32_skip_downconv(op1);
1388 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1389 * 1. Add with immediate -> Lea
1390 * 2. Add with possible source address mode -> Add
1391 * 3. Otherwise -> Lea
1393 memset(&addr, 0, sizeof(addr));
1394 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1395 add_immediate_op = NULL;
1397 dbgi = get_irn_dbg_info(node);
1398 block = get_nodes_block(node);
1399 new_block = be_transform_node(block);
1402 if (addr.base == NULL && addr.index == NULL) {
1403 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1404 addr.symconst_sign, 0, addr.offset);
1405 SET_IA32_ORIG_NODE(new_node, node);
1408 /* add with immediate? */
1409 if (addr.index == NULL) {
1410 add_immediate_op = addr.base;
1411 } else if (addr.base == NULL && addr.scale == 0) {
1412 add_immediate_op = addr.index;
1415 if (add_immediate_op != NULL) {
1416 if (!am_has_immediates(&addr)) {
1417 #ifdef DEBUG_libfirm
1418 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1421 return be_transform_node(add_immediate_op);
1424 new_node = create_lea_from_address(dbgi, new_block, &addr);
1425 SET_IA32_ORIG_NODE(new_node, node);
1429 /* test if we can use source address mode */
1430 match_arguments(&am, block, op1, op2, NULL, match_commutative
1431 | match_mode_neutral | match_am | match_immediate | match_try_am);
1433 /* construct an Add with source address mode */
1434 if (am.op_type == ia32_AddrModeS) {
1435 ia32_address_t *am_addr = &am.addr;
1436 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1437 am_addr->index, am_addr->mem, am.new_op1,
1439 set_am_attributes(new_node, &am);
1440 SET_IA32_ORIG_NODE(new_node, node);
1442 new_node = fix_mem_proj(new_node, &am);
1447 /* otherwise construct a lea */
1448 new_node = create_lea_from_address(dbgi, new_block, &addr);
1449 SET_IA32_ORIG_NODE(new_node, node);
1454 * Creates an ia32 Mul.
1456 * @return the created ia32 Mul node
1458 static ir_node *gen_Mul(ir_node *node)
1460 ir_node *op1 = get_Mul_left(node);
1461 ir_node *op2 = get_Mul_right(node);
1462 ir_mode *mode = get_irn_mode(node);
1464 if (mode_is_float(mode)) {
1465 if (ia32_cg_config.use_sse2)
1466 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1467 match_commutative | match_am);
1469 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1471 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1472 match_commutative | match_am | match_mode_neutral |
1473 match_immediate | match_am_and_immediates);
1477 * Creates an ia32 Mulh.
1478 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1479 * this result while Mul returns the lower 32 bit.
1481 * @return the created ia32 Mulh node
1483 static ir_node *gen_Mulh(ir_node *node)
1485 dbg_info *dbgi = get_irn_dbg_info(node);
1486 ir_node *op1 = get_Mulh_left(node);
1487 ir_node *op2 = get_Mulh_right(node);
1488 ir_mode *mode = get_irn_mode(node);
1490 ir_node *proj_res_high;
1492 if (get_mode_size_bits(mode) != 32) {
1493 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1496 if (mode_is_signed(mode)) {
1497 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1498 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1500 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1501 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1503 return proj_res_high;
1507 * Creates an ia32 And.
1509 * @return The created ia32 And node
1511 static ir_node *gen_And(ir_node *node)
1513 ir_node *op1 = get_And_left(node);
1514 ir_node *op2 = get_And_right(node);
1515 assert(! mode_is_float(get_irn_mode(node)));
1517 /* is it a zero extension? */
1518 if (is_Const(op2)) {
1519 ir_tarval *tv = get_Const_tarval(op2);
1520 long v = get_tarval_long(tv);
1522 if (v == 0xFF || v == 0xFFFF) {
1523 dbg_info *dbgi = get_irn_dbg_info(node);
1524 ir_node *block = get_nodes_block(node);
1531 assert(v == 0xFFFF);
1534 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1539 return gen_binop(node, op1, op2, new_bd_ia32_And,
1540 match_commutative | match_mode_neutral | match_am | match_immediate);
1544 * Creates an ia32 Or.
1546 * @return The created ia32 Or node
1548 static ir_node *gen_Or(ir_node *node)
1550 ir_node *op1 = get_Or_left(node);
1551 ir_node *op2 = get_Or_right(node);
1554 res = match_64bit_shift(node);
1558 assert (! mode_is_float(get_irn_mode(node)));
1559 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1560 | match_mode_neutral | match_am | match_immediate);
1566 * Creates an ia32 Eor.
1568 * @return The created ia32 Eor node
1570 static ir_node *gen_Eor(ir_node *node)
1572 ir_node *op1 = get_Eor_left(node);
1573 ir_node *op2 = get_Eor_right(node);
1575 assert(! mode_is_float(get_irn_mode(node)));
1576 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1577 | match_mode_neutral | match_am | match_immediate);
1582 * Creates an ia32 Sub.
1584 * @return The created ia32 Sub node
1586 static ir_node *gen_Sub(ir_node *node)
1588 ir_node *op1 = get_Sub_left(node);
1589 ir_node *op2 = get_Sub_right(node);
1590 ir_mode *mode = get_irn_mode(node);
1592 if (mode_is_float(mode)) {
1593 if (ia32_cg_config.use_sse2)
1594 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1596 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1599 if (is_Const(op2)) {
1600 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1604 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1605 | match_am | match_immediate);
1608 static ir_node *transform_AM_mem(ir_node *const block,
1609 ir_node *const src_val,
1610 ir_node *const src_mem,
1611 ir_node *const am_mem)
1613 if (is_NoMem(am_mem)) {
1614 return be_transform_node(src_mem);
1615 } else if (is_Proj(src_val) &&
1617 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1618 /* avoid memory loop */
1620 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1621 ir_node *const ptr_pred = get_Proj_pred(src_val);
1622 int const arity = get_Sync_n_preds(src_mem);
1627 NEW_ARR_A(ir_node*, ins, arity + 1);
1629 /* NOTE: This sometimes produces dead-code because the old sync in
1630 * src_mem might not be used anymore, we should detect this case
1631 * and kill the sync... */
1632 for (i = arity - 1; i >= 0; --i) {
1633 ir_node *const pred = get_Sync_pred(src_mem, i);
1635 /* avoid memory loop */
1636 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1639 ins[n++] = be_transform_node(pred);
1642 if (n==1 && ins[0] == am_mem) {
1644 /* creating a new Sync and relying on CSE may fail,
1645 * if am_mem is a ProjM, which does not yet verify. */
1649 return new_r_Sync(block, n, ins);
1653 ins[0] = be_transform_node(src_mem);
1655 return new_r_Sync(block, 2, ins);
1660 * Create a 32bit to 64bit signed extension.
1662 * @param dbgi debug info
1663 * @param block the block where node nodes should be placed
1664 * @param val the value to extend
1665 * @param orig the original node
1667 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1668 ir_node *val, const ir_node *orig)
1673 if (ia32_cg_config.use_short_sex_eax) {
1674 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1675 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1677 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1678 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1680 SET_IA32_ORIG_NODE(res, orig);
1685 * Generates an ia32 Div with additional infrastructure for the
1686 * register allocator if needed.
1688 static ir_node *create_Div(ir_node *node)
1690 dbg_info *dbgi = get_irn_dbg_info(node);
1691 ir_node *block = get_nodes_block(node);
1692 ir_node *new_block = be_transform_node(block);
1693 int throws_exception = ir_throws_exception(node);
1700 ir_node *sign_extension;
1701 ia32_address_mode_t am;
1702 ia32_address_t *addr = &am.addr;
1704 /* the upper bits have random contents for smaller modes */
1705 switch (get_irn_opcode(node)) {
1707 op1 = get_Div_left(node);
1708 op2 = get_Div_right(node);
1709 mem = get_Div_mem(node);
1710 mode = get_Div_resmode(node);
1713 op1 = get_Mod_left(node);
1714 op2 = get_Mod_right(node);
1715 mem = get_Mod_mem(node);
1716 mode = get_Mod_resmode(node);
1719 panic("invalid divmod node %+F", node);
1722 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1724 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1725 is the memory of the consumed address. We can have only the second op as address
1726 in Div nodes, so check only op2. */
1727 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1729 if (mode_is_signed(mode)) {
1730 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1731 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1732 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1734 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1736 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1737 addr->index, new_mem, am.new_op2,
1738 am.new_op1, sign_extension);
1740 ir_set_throws_exception(new_node, throws_exception);
1742 set_irn_pinned(new_node, get_irn_pinned(node));
1744 set_am_attributes(new_node, &am);
1745 SET_IA32_ORIG_NODE(new_node, node);
1747 new_node = fix_mem_proj(new_node, &am);
1753 * Generates an ia32 Mod.
1755 static ir_node *gen_Mod(ir_node *node)
1757 return create_Div(node);
1761 * Generates an ia32 Div.
1763 static ir_node *gen_Div(ir_node *node)
1765 ir_mode *mode = get_Div_resmode(node);
1766 if (mode_is_float(mode)) {
1767 ir_node *op1 = get_Div_left(node);
1768 ir_node *op2 = get_Div_right(node);
1770 if (ia32_cg_config.use_sse2) {
1771 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1773 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1777 return create_Div(node);
1781 * Creates an ia32 Shl.
1783 * @return The created ia32 Shl node
1785 static ir_node *gen_Shl(ir_node *node)
1787 ir_node *left = get_Shl_left(node);
1788 ir_node *right = get_Shl_right(node);
1790 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1791 match_mode_neutral | match_immediate);
1795 * Creates an ia32 Shr.
1797 * @return The created ia32 Shr node
1799 static ir_node *gen_Shr(ir_node *node)
1801 ir_node *left = get_Shr_left(node);
1802 ir_node *right = get_Shr_right(node);
1804 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1810 * Creates an ia32 Sar.
1812 * @return The created ia32 Shrs node
1814 static ir_node *gen_Shrs(ir_node *node)
1816 ir_node *left = get_Shrs_left(node);
1817 ir_node *right = get_Shrs_right(node);
1819 if (is_Const(right)) {
1820 ir_tarval *tv = get_Const_tarval(right);
1821 long val = get_tarval_long(tv);
1823 /* this is a sign extension */
1824 dbg_info *dbgi = get_irn_dbg_info(node);
1825 ir_node *block = be_transform_node(get_nodes_block(node));
1826 ir_node *new_op = be_transform_node(left);
1828 return create_sex_32_64(dbgi, block, new_op, node);
1832 /* 8 or 16 bit sign extension? */
1833 if (is_Const(right) && is_Shl(left)) {
1834 ir_node *shl_left = get_Shl_left(left);
1835 ir_node *shl_right = get_Shl_right(left);
1836 if (is_Const(shl_right)) {
1837 ir_tarval *tv1 = get_Const_tarval(right);
1838 ir_tarval *tv2 = get_Const_tarval(shl_right);
1839 if (tv1 == tv2 && tarval_is_long(tv1)) {
1840 long val = get_tarval_long(tv1);
1841 if (val == 16 || val == 24) {
1842 dbg_info *dbgi = get_irn_dbg_info(node);
1843 ir_node *block = get_nodes_block(node);
1853 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1862 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1868 * Creates an ia32 Rol.
1870 * @param op1 The first operator
1871 * @param op2 The second operator
1872 * @return The created ia32 RotL node
1874 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1876 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1882 * Creates an ia32 Ror.
1883 * NOTE: There is no RotR with immediate because this would always be a RotL
1884 * "imm-mode_size_bits" which can be pre-calculated.
1886 * @param op1 The first operator
1887 * @param op2 The second operator
1888 * @return The created ia32 RotR node
1890 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1892 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1898 * Creates an ia32 RotR or RotL (depending on the found pattern).
1900 * @return The created ia32 RotL or RotR node
1902 static ir_node *gen_Rotl(ir_node *node)
1904 ir_node *op1 = get_Rotl_left(node);
1905 ir_node *op2 = get_Rotl_right(node);
1907 if (is_Minus(op2)) {
1908 return gen_Ror(node, op1, get_Minus_op(op2));
1911 return gen_Rol(node, op1, op2);
1917 * Transforms a Minus node.
1919 * @return The created ia32 Minus node
1921 static ir_node *gen_Minus(ir_node *node)
1923 ir_node *op = get_Minus_op(node);
1924 ir_node *block = be_transform_node(get_nodes_block(node));
1925 dbg_info *dbgi = get_irn_dbg_info(node);
1926 ir_mode *mode = get_irn_mode(node);
1931 if (mode_is_float(mode)) {
1932 ir_node *new_op = be_transform_node(op);
1933 if (ia32_cg_config.use_sse2) {
1934 /* TODO: non-optimal... if we have many xXors, then we should
1935 * rather create a load for the const and use that instead of
1936 * several AM nodes... */
1937 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1939 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1940 noreg_GP, nomem, new_op, noreg_xmm);
1942 size = get_mode_size_bits(mode);
1943 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1945 set_ia32_am_sc(new_node, ent);
1946 set_ia32_op_type(new_node, ia32_AddrModeS);
1947 set_ia32_ls_mode(new_node, mode);
1949 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1952 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1955 SET_IA32_ORIG_NODE(new_node, node);
1961 * Transforms a Not node.
1963 * @return The created ia32 Not node
1965 static ir_node *gen_Not(ir_node *node)
1967 ir_node *op = get_Not_op(node);
1969 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1970 assert (! mode_is_float(get_irn_mode(node)));
1972 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1975 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1976 bool negate, ir_node *node)
1978 ir_node *new_block = be_transform_node(block);
1979 ir_mode *mode = get_irn_mode(op);
1980 ir_node *new_op = be_transform_node(op);
1985 assert(mode_is_float(mode));
1987 if (ia32_cg_config.use_sse2) {
1988 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1989 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1990 noreg_GP, nomem, new_op, noreg_fp);
1992 size = get_mode_size_bits(mode);
1993 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1995 set_ia32_am_sc(new_node, ent);
1997 SET_IA32_ORIG_NODE(new_node, node);
1999 set_ia32_op_type(new_node, ia32_AddrModeS);
2000 set_ia32_ls_mode(new_node, mode);
2002 /* TODO, implement -Abs case */
2005 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
2006 SET_IA32_ORIG_NODE(new_node, node);
2008 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2009 SET_IA32_ORIG_NODE(new_node, node);
2017 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2019 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2021 dbg_info *dbgi = get_irn_dbg_info(cmp);
2022 ir_node *block = get_nodes_block(cmp);
2023 ir_node *new_block = be_transform_node(block);
2024 ir_node *op1 = be_transform_node(x);
2025 ir_node *op2 = be_transform_node(n);
2027 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2030 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2033 if (mode_is_float(mode)) {
2035 case ir_relation_equal: return ia32_cc_float_equal;
2036 case ir_relation_less: return ia32_cc_float_below;
2037 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2038 case ir_relation_greater: return ia32_cc_float_above;
2039 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2040 case ir_relation_less_greater: return ia32_cc_not_equal;
2041 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2042 case ir_relation_unordered: return ia32_cc_parity;
2043 case ir_relation_unordered_equal: return ia32_cc_equal;
2044 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2045 case ir_relation_unordered_less_equal:
2046 return ia32_cc_float_unordered_below_equal;
2047 case ir_relation_unordered_greater:
2048 return ia32_cc_float_unordered_above;
2049 case ir_relation_unordered_greater_equal:
2050 return ia32_cc_float_unordered_above_equal;
2051 case ir_relation_unordered_less_greater:
2052 return ia32_cc_float_not_equal;
2053 case ir_relation_false:
2054 case ir_relation_true:
2055 /* should we introduce a jump always/jump never? */
2058 panic("Unexpected float pnc");
2059 } else if (mode_is_signed(mode)) {
2061 case ir_relation_unordered_equal:
2062 case ir_relation_equal: return ia32_cc_equal;
2063 case ir_relation_unordered_less:
2064 case ir_relation_less: return ia32_cc_less;
2065 case ir_relation_unordered_less_equal:
2066 case ir_relation_less_equal: return ia32_cc_less_equal;
2067 case ir_relation_unordered_greater:
2068 case ir_relation_greater: return ia32_cc_greater;
2069 case ir_relation_unordered_greater_equal:
2070 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2071 case ir_relation_unordered_less_greater:
2072 case ir_relation_less_greater: return ia32_cc_not_equal;
2073 case ir_relation_less_equal_greater:
2074 case ir_relation_unordered:
2075 case ir_relation_false:
2076 case ir_relation_true:
2077 /* introduce jump always/jump never? */
2080 panic("Unexpected pnc");
2083 case ir_relation_unordered_equal:
2084 case ir_relation_equal: return ia32_cc_equal;
2085 case ir_relation_unordered_less:
2086 case ir_relation_less: return ia32_cc_below;
2087 case ir_relation_unordered_less_equal:
2088 case ir_relation_less_equal: return ia32_cc_below_equal;
2089 case ir_relation_unordered_greater:
2090 case ir_relation_greater: return ia32_cc_above;
2091 case ir_relation_unordered_greater_equal:
2092 case ir_relation_greater_equal: return ia32_cc_above_equal;
2093 case ir_relation_unordered_less_greater:
2094 case ir_relation_less_greater: return ia32_cc_not_equal;
2095 case ir_relation_less_equal_greater:
2096 case ir_relation_unordered:
2097 case ir_relation_false:
2098 case ir_relation_true:
2099 /* introduce jump always/jump never? */
2102 panic("Unexpected pnc");
2106 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2108 /* must have a Cmp as input */
2109 ir_relation relation = get_Cmp_relation(cmp);
2110 ir_relation possible;
2111 ir_node *l = get_Cmp_left(cmp);
2112 ir_node *r = get_Cmp_right(cmp);
2113 ir_mode *mode = get_irn_mode(l);
2116 /* check for bit-test */
2117 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2118 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2119 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2121 ir_node *la = get_And_left(l);
2122 ir_node *ra = get_And_right(l);
2129 ir_node *c = get_Shl_left(la);
2130 if (is_Const_1(c) && is_Const_0(r)) {
2131 /* (1 << n) & ra) */
2132 ir_node *n = get_Shl_right(la);
2133 flags = gen_bt(cmp, ra, n);
2134 /* the bit is copied into the CF flag */
2135 if (relation & ir_relation_equal)
2136 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2138 *cc_out = ia32_cc_below; /* test for CF=1 */
2144 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2145 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2146 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2147 * a predecessor node). So add the < bit */
2148 possible = ir_get_possible_cmp_relations(l, r);
2149 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2150 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2151 relation |= ir_relation_less_greater;
2153 /* just do a normal transformation of the Cmp */
2154 *cc_out = relation_to_condition_code(relation, mode);
2155 flags = be_transform_node(cmp);
2160 * Transform a node returning a "flag" result.
2162 * @param node the node to transform
2163 * @param cc_out the compare mode to use
2165 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2167 assert(is_Cmp(node));
2168 return get_flags_node_cmp(node, cc_out);
2172 * Transforms a Load.
2174 * @return the created ia32 Load node
2176 static ir_node *gen_Load(ir_node *node)
2178 ir_node *old_block = get_nodes_block(node);
2179 ir_node *block = be_transform_node(old_block);
2180 ir_node *ptr = get_Load_ptr(node);
2181 ir_node *mem = get_Load_mem(node);
2182 ir_node *new_mem = be_transform_node(mem);
2183 dbg_info *dbgi = get_irn_dbg_info(node);
2184 ir_mode *mode = get_Load_mode(node);
2185 int throws_exception = ir_throws_exception(node);
2189 ia32_address_t addr;
2191 /* construct load address */
2192 memset(&addr, 0, sizeof(addr));
2193 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2200 base = be_transform_node(base);
2206 idx = be_transform_node(idx);
2209 if (mode_is_float(mode)) {
2210 if (ia32_cg_config.use_sse2) {
2211 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2214 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2218 assert(mode != mode_b);
2220 /* create a conv node with address mode for smaller modes */
2221 if (get_mode_size_bits(mode) < 32) {
2222 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2223 new_mem, noreg_GP, mode);
2225 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2228 ir_set_throws_exception(new_node, throws_exception);
2230 set_irn_pinned(new_node, get_irn_pinned(node));
2231 set_ia32_op_type(new_node, ia32_AddrModeS);
2232 set_ia32_ls_mode(new_node, mode);
2233 set_address(new_node, &addr);
2235 if (get_irn_pinned(node) == op_pin_state_floats) {
2236 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2237 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2238 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2239 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2242 SET_IA32_ORIG_NODE(new_node, node);
2247 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2248 ir_node *ptr, ir_node *other)
2255 /* we only use address mode if we're the only user of the load */
2256 if (get_irn_n_edges(node) > 1)
2259 load = get_Proj_pred(node);
2262 if (get_nodes_block(load) != block)
2265 /* store should have the same pointer as the load */
2266 if (get_Load_ptr(load) != ptr)
2269 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2270 if (other != NULL &&
2271 get_nodes_block(other) == block &&
2272 heights_reachable_in_block(ia32_heights, other, load)) {
2276 if (ia32_prevents_AM(block, load, mem))
2278 /* Store should be attached to the load via mem */
2279 assert(heights_reachable_in_block(ia32_heights, mem, load));
2284 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2285 ir_node *mem, ir_node *ptr, ir_mode *mode,
2286 construct_binop_dest_func *func,
2287 construct_binop_dest_func *func8bit,
2288 match_flags_t flags)
2290 ir_node *src_block = get_nodes_block(node);
2298 ia32_address_mode_t am;
2299 ia32_address_t *addr = &am.addr;
2300 memset(&am, 0, sizeof(am));
2302 assert(flags & match_immediate); /* there is no destam node without... */
2303 commutative = (flags & match_commutative) != 0;
2305 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2306 build_address(&am, op1, ia32_create_am_double_use);
2307 new_op = create_immediate_or_transform(op2, 0);
2308 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2309 build_address(&am, op2, ia32_create_am_double_use);
2310 new_op = create_immediate_or_transform(op1, 0);
2315 if (addr->base == NULL)
2316 addr->base = noreg_GP;
2317 if (addr->index == NULL)
2318 addr->index = noreg_GP;
2319 if (addr->mem == NULL)
2322 dbgi = get_irn_dbg_info(node);
2323 block = be_transform_node(src_block);
2324 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2326 if (get_mode_size_bits(mode) == 8) {
2327 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2329 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2331 set_address(new_node, addr);
2332 set_ia32_op_type(new_node, ia32_AddrModeD);
2333 set_ia32_ls_mode(new_node, mode);
2334 SET_IA32_ORIG_NODE(new_node, node);
2336 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2337 mem_proj = be_transform_node(am.mem_proj);
2338 be_set_transformed_node(am.mem_proj, new_node);
2339 be_set_transformed_node(mem_proj, new_node);
2344 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2345 ir_node *ptr, ir_mode *mode,
2346 construct_unop_dest_func *func)
2348 ir_node *src_block = get_nodes_block(node);
2354 ia32_address_mode_t am;
2355 ia32_address_t *addr = &am.addr;
2357 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2360 memset(&am, 0, sizeof(am));
2361 build_address(&am, op, ia32_create_am_double_use);
2363 dbgi = get_irn_dbg_info(node);
2364 block = be_transform_node(src_block);
2365 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2366 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2367 set_address(new_node, addr);
2368 set_ia32_op_type(new_node, ia32_AddrModeD);
2369 set_ia32_ls_mode(new_node, mode);
2370 SET_IA32_ORIG_NODE(new_node, node);
2372 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2373 mem_proj = be_transform_node(am.mem_proj);
2374 be_set_transformed_node(am.mem_proj, new_node);
2375 be_set_transformed_node(mem_proj, new_node);
2380 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2382 ir_mode *mode = get_irn_mode(node);
2383 ir_node *mux_true = get_Mux_true(node);
2384 ir_node *mux_false = get_Mux_false(node);
2392 ia32_condition_code_t cc;
2393 ia32_address_t addr;
2395 if (get_mode_size_bits(mode) != 8)
2398 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2400 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2406 cond = get_Mux_sel(node);
2407 flags = get_flags_node(cond, &cc);
2408 /* we can't handle the float special cases with SetM */
2409 if (cc & ia32_cc_additional_float_cases)
2412 cc = ia32_negate_condition_code(cc);
2414 build_address_ptr(&addr, ptr, mem);
2416 dbgi = get_irn_dbg_info(node);
2417 block = get_nodes_block(node);
2418 new_block = be_transform_node(block);
2419 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2420 addr.index, addr.mem, flags, cc);
2421 set_address(new_node, &addr);
2422 set_ia32_op_type(new_node, ia32_AddrModeD);
2423 set_ia32_ls_mode(new_node, mode);
2424 SET_IA32_ORIG_NODE(new_node, node);
2429 static ir_node *try_create_dest_am(ir_node *node)
2431 ir_node *val = get_Store_value(node);
2432 ir_node *mem = get_Store_mem(node);
2433 ir_node *ptr = get_Store_ptr(node);
2434 ir_mode *mode = get_irn_mode(val);
2435 unsigned bits = get_mode_size_bits(mode);
2440 /* handle only GP modes for now... */
2441 if (!ia32_mode_needs_gp_reg(mode))
2445 /* store must be the only user of the val node */
2446 if (get_irn_n_edges(val) > 1)
2448 /* skip pointless convs */
2450 ir_node *conv_op = get_Conv_op(val);
2451 ir_mode *pred_mode = get_irn_mode(conv_op);
2452 if (!ia32_mode_needs_gp_reg(pred_mode))
2454 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2462 /* value must be in the same block */
2463 if (get_nodes_block(node) != get_nodes_block(val))
2466 switch (get_irn_opcode(val)) {
2468 op1 = get_Add_left(val);
2469 op2 = get_Add_right(val);
2470 if (ia32_cg_config.use_incdec) {
2471 if (is_Const_1(op2)) {
2472 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2474 } else if (is_Const_Minus_1(op2)) {
2475 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2479 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2480 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2481 match_commutative | match_immediate);
2484 op1 = get_Sub_left(val);
2485 op2 = get_Sub_right(val);
2486 if (is_Const(op2)) {
2487 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2489 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2490 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2494 op1 = get_And_left(val);
2495 op2 = get_And_right(val);
2496 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2497 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2498 match_commutative | match_immediate);
2501 op1 = get_Or_left(val);
2502 op2 = get_Or_right(val);
2503 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2504 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2505 match_commutative | match_immediate);
2508 op1 = get_Eor_left(val);
2509 op2 = get_Eor_right(val);
2510 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2511 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2512 match_commutative | match_immediate);
2515 op1 = get_Shl_left(val);
2516 op2 = get_Shl_right(val);
2517 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2518 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2522 op1 = get_Shr_left(val);
2523 op2 = get_Shr_right(val);
2524 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2525 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2529 op1 = get_Shrs_left(val);
2530 op2 = get_Shrs_right(val);
2531 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2532 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2536 op1 = get_Rotl_left(val);
2537 op2 = get_Rotl_right(val);
2538 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2539 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2542 /* TODO: match ROR patterns... */
2544 new_node = try_create_SetMem(val, ptr, mem);
2548 op1 = get_Minus_op(val);
2549 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2552 /* should be lowered already */
2553 assert(mode != mode_b);
2554 op1 = get_Not_op(val);
2555 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2561 if (new_node != NULL) {
2562 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2563 get_irn_pinned(node) == op_pin_state_pinned) {
2564 set_irn_pinned(new_node, op_pin_state_pinned);
2571 static bool possible_int_mode_for_fp(ir_mode *mode)
2575 if (!mode_is_signed(mode))
2577 size = get_mode_size_bits(mode);
2578 if (size != 16 && size != 32)
2583 static int is_float_to_int_conv(const ir_node *node)
2585 ir_mode *mode = get_irn_mode(node);
2589 if (!possible_int_mode_for_fp(mode))
2594 conv_op = get_Conv_op(node);
2595 conv_mode = get_irn_mode(conv_op);
2597 if (!mode_is_float(conv_mode))
2604 * Transform a Store(floatConst) into a sequence of
2607 * @return the created ia32 Store node
2609 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2611 ir_mode *mode = get_irn_mode(cns);
2612 unsigned size = get_mode_size_bytes(mode);
2613 ir_tarval *tv = get_Const_tarval(cns);
2614 ir_node *block = get_nodes_block(node);
2615 ir_node *new_block = be_transform_node(block);
2616 ir_node *ptr = get_Store_ptr(node);
2617 ir_node *mem = get_Store_mem(node);
2618 dbg_info *dbgi = get_irn_dbg_info(node);
2621 int throws_exception = ir_throws_exception(node);
2623 ia32_address_t addr;
2625 assert(size % 4 == 0);
2628 build_address_ptr(&addr, ptr, mem);
2632 get_tarval_sub_bits(tv, ofs) |
2633 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2634 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2635 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2636 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2638 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2639 addr.index, addr.mem, imm);
2640 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2642 ir_set_throws_exception(new_node, throws_exception);
2643 set_irn_pinned(new_node, get_irn_pinned(node));
2644 set_ia32_op_type(new_node, ia32_AddrModeD);
2645 set_ia32_ls_mode(new_node, mode_Iu);
2646 set_address(new_node, &addr);
2647 SET_IA32_ORIG_NODE(new_node, node);
2655 } while (size != 0);
2658 return new_rd_Sync(dbgi, new_block, i, ins);
2660 return get_Proj_pred(ins[0]);
2665 * Generate a vfist or vfisttp instruction.
2667 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2668 ir_node *index, ir_node *mem, ir_node *val)
2670 if (ia32_cg_config.use_fisttp) {
2671 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2672 if other users exists */
2673 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2674 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2675 be_new_Keep(block, 1, &value);
2679 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2682 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2688 * Transforms a general (no special case) Store.
2690 * @return the created ia32 Store node
2692 static ir_node *gen_general_Store(ir_node *node)
2694 ir_node *val = get_Store_value(node);
2695 ir_mode *mode = get_irn_mode(val);
2696 ir_node *block = get_nodes_block(node);
2697 ir_node *new_block = be_transform_node(block);
2698 ir_node *ptr = get_Store_ptr(node);
2699 ir_node *mem = get_Store_mem(node);
2700 dbg_info *dbgi = get_irn_dbg_info(node);
2701 int throws_exception = ir_throws_exception(node);
2704 ia32_address_t addr;
2706 /* check for destination address mode */
2707 new_node = try_create_dest_am(node);
2708 if (new_node != NULL)
2711 /* construct store address */
2712 memset(&addr, 0, sizeof(addr));
2713 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2715 if (addr.base == NULL) {
2716 addr.base = noreg_GP;
2718 addr.base = be_transform_node(addr.base);
2721 if (addr.index == NULL) {
2722 addr.index = noreg_GP;
2724 addr.index = be_transform_node(addr.index);
2726 addr.mem = be_transform_node(mem);
2728 if (mode_is_float(mode)) {
2729 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2731 while (is_Conv(val) && mode == get_irn_mode(val)) {
2732 ir_node *op = get_Conv_op(val);
2733 if (!mode_is_float(get_irn_mode(op)))
2737 new_val = be_transform_node(val);
2738 if (ia32_cg_config.use_sse2) {
2739 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2740 addr.index, addr.mem, new_val);
2742 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2743 addr.index, addr.mem, new_val, mode);
2745 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2746 val = get_Conv_op(val);
2748 /* TODO: is this optimisation still necessary at all (middleend)? */
2749 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2751 while (is_Conv(val)) {
2752 ir_node *op = get_Conv_op(val);
2753 if (!mode_is_float(get_irn_mode(op)))
2755 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2759 new_val = be_transform_node(val);
2760 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2762 new_val = create_immediate_or_transform(val, 0);
2763 assert(mode != mode_b);
2765 if (get_mode_size_bits(mode) == 8) {
2766 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2767 addr.index, addr.mem, new_val);
2769 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2770 addr.index, addr.mem, new_val);
2773 ir_set_throws_exception(new_node, throws_exception);
2775 set_irn_pinned(new_node, get_irn_pinned(node));
2776 set_ia32_op_type(new_node, ia32_AddrModeD);
2777 set_ia32_ls_mode(new_node, mode);
2779 set_address(new_node, &addr);
2780 SET_IA32_ORIG_NODE(new_node, node);
2786 * Transforms a Store.
2788 * @return the created ia32 Store node
2790 static ir_node *gen_Store(ir_node *node)
2792 ir_node *val = get_Store_value(node);
2793 ir_mode *mode = get_irn_mode(val);
2795 if (mode_is_float(mode) && is_Const(val)) {
2796 /* We can transform every floating const store
2797 into a sequence of integer stores.
2798 If the constant is already in a register,
2799 it would be better to use it, but we don't
2800 have this information here. */
2801 return gen_float_const_Store(node, val);
2803 return gen_general_Store(node);
2807 * Transforms a Switch.
2809 * @return the created ia32 SwitchJmp node
2811 static ir_node *create_Switch(ir_node *node)
2813 dbg_info *dbgi = get_irn_dbg_info(node);
2814 ir_node *block = be_transform_node(get_nodes_block(node));
2815 ir_node *sel = get_Cond_selector(node);
2816 ir_node *new_sel = be_transform_node(sel);
2817 long default_pn = get_Cond_default_proj(node);
2821 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2823 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2824 set_entity_visibility(entity, ir_visibility_private);
2825 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2827 /* TODO: we could perform some more matching here to also use the base
2828 * register of the address mode */
2830 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2831 set_ia32_am_scale(new_node, 2);
2832 set_ia32_am_sc(new_node, entity);
2833 set_ia32_op_type(new_node, ia32_AddrModeS);
2834 set_ia32_ls_mode(new_node, mode_Iu);
2835 SET_IA32_ORIG_NODE(new_node, node);
2836 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2837 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2843 * Transform a Cond node.
2845 static ir_node *gen_Cond(ir_node *node)
2847 ir_node *block = get_nodes_block(node);
2848 ir_node *new_block = be_transform_node(block);
2849 dbg_info *dbgi = get_irn_dbg_info(node);
2850 ir_node *sel = get_Cond_selector(node);
2851 ir_mode *sel_mode = get_irn_mode(sel);
2852 ir_node *flags = NULL;
2854 ia32_condition_code_t cc;
2856 if (sel_mode != mode_b) {
2857 return create_Switch(node);
2860 /* we get flags from a Cmp */
2861 flags = get_flags_node(sel, &cc);
2863 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2864 SET_IA32_ORIG_NODE(new_node, node);
2870 * Transform a be_Copy.
2872 static ir_node *gen_be_Copy(ir_node *node)
2874 ir_node *new_node = be_duplicate_node(node);
2875 ir_mode *mode = get_irn_mode(new_node);
2877 if (ia32_mode_needs_gp_reg(mode)) {
2878 set_irn_mode(new_node, mode_Iu);
2884 static ir_node *create_Fucom(ir_node *node)
2886 dbg_info *dbgi = get_irn_dbg_info(node);
2887 ir_node *block = get_nodes_block(node);
2888 ir_node *new_block = be_transform_node(block);
2889 ir_node *left = get_Cmp_left(node);
2890 ir_node *new_left = be_transform_node(left);
2891 ir_node *right = get_Cmp_right(node);
2895 if (ia32_cg_config.use_fucomi) {
2896 new_right = be_transform_node(right);
2897 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2899 set_ia32_commutative(new_node);
2900 SET_IA32_ORIG_NODE(new_node, node);
2902 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2903 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2905 new_right = be_transform_node(right);
2906 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2909 set_ia32_commutative(new_node);
2911 SET_IA32_ORIG_NODE(new_node, node);
2913 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2914 SET_IA32_ORIG_NODE(new_node, node);
2920 static ir_node *create_Ucomi(ir_node *node)
2922 dbg_info *dbgi = get_irn_dbg_info(node);
2923 ir_node *src_block = get_nodes_block(node);
2924 ir_node *new_block = be_transform_node(src_block);
2925 ir_node *left = get_Cmp_left(node);
2926 ir_node *right = get_Cmp_right(node);
2928 ia32_address_mode_t am;
2929 ia32_address_t *addr = &am.addr;
2931 match_arguments(&am, src_block, left, right, NULL,
2932 match_commutative | match_am);
2934 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2935 addr->mem, am.new_op1, am.new_op2,
2937 set_am_attributes(new_node, &am);
2939 SET_IA32_ORIG_NODE(new_node, node);
2941 new_node = fix_mem_proj(new_node, &am);
2947 * returns true if it is assured, that the upper bits of a node are "clean"
2948 * which means for a 16 or 8 bit value, that the upper bits in the register
2949 * are 0 for unsigned and a copy of the last significant bit for signed
2952 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2954 assert(ia32_mode_needs_gp_reg(mode));
2955 if (get_mode_size_bits(mode) >= 32)
2958 if (is_Proj(transformed_node))
2959 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2961 switch (get_ia32_irn_opcode(transformed_node)) {
2962 case iro_ia32_Conv_I2I:
2963 case iro_ia32_Conv_I2I8Bit: {
2964 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2965 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2967 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2974 if (mode_is_signed(mode)) {
2975 return false; /* TODO handle signed modes */
2977 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2978 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2979 const ia32_immediate_attr_t *attr
2980 = get_ia32_immediate_attr_const(right);
2981 if (attr->symconst == 0 &&
2982 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2986 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2990 /* TODO too conservative if shift amount is constant */
2991 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2994 if (!mode_is_signed(mode)) {
2996 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2997 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2999 /* TODO if one is known to be zero extended, then || is sufficient */
3004 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3005 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3007 case iro_ia32_Const:
3008 case iro_ia32_Immediate: {
3009 const ia32_immediate_attr_t *attr =
3010 get_ia32_immediate_attr_const(transformed_node);
3011 if (mode_is_signed(mode)) {
3012 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3013 return shifted == 0 || shifted == -1;
3015 unsigned long shifted = (unsigned long)attr->offset;
3016 shifted >>= get_mode_size_bits(mode)-1;
3018 return shifted == 0;
3028 * Generate code for a Cmp.
3030 static ir_node *gen_Cmp(ir_node *node)
3032 dbg_info *dbgi = get_irn_dbg_info(node);
3033 ir_node *block = get_nodes_block(node);
3034 ir_node *new_block = be_transform_node(block);
3035 ir_node *left = get_Cmp_left(node);
3036 ir_node *right = get_Cmp_right(node);
3037 ir_mode *cmp_mode = get_irn_mode(left);
3039 ia32_address_mode_t am;
3040 ia32_address_t *addr = &am.addr;
3042 if (mode_is_float(cmp_mode)) {
3043 if (ia32_cg_config.use_sse2) {
3044 return create_Ucomi(node);
3046 return create_Fucom(node);
3050 assert(ia32_mode_needs_gp_reg(cmp_mode));
3052 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3053 if (is_Const_0(right) &&
3055 get_irn_n_edges(left) == 1) {
3056 /* Test(and_left, and_right) */
3057 ir_node *and_left = get_And_left(left);
3058 ir_node *and_right = get_And_right(left);
3060 /* matze: code here used mode instead of cmd_mode, I think it is always
3061 * the same as cmp_mode, but I leave this here to see if this is really
3064 assert(get_irn_mode(and_left) == cmp_mode);
3066 match_arguments(&am, block, and_left, and_right, NULL,
3068 match_am | match_8bit_am | match_16bit_am |
3069 match_am_and_immediates | match_immediate);
3071 /* use 32bit compare mode if possible since the opcode is smaller */
3072 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3073 upper_bits_clean(am.new_op2, cmp_mode)) {
3074 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3077 if (get_mode_size_bits(cmp_mode) == 8) {
3078 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3079 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3081 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3082 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3085 /* Cmp(left, right) */
3086 match_arguments(&am, block, left, right, NULL,
3087 match_commutative | match_am | match_8bit_am |
3088 match_16bit_am | match_am_and_immediates |
3090 /* use 32bit compare mode if possible since the opcode is smaller */
3091 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3092 upper_bits_clean(am.new_op2, cmp_mode)) {
3093 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3096 if (get_mode_size_bits(cmp_mode) == 8) {
3097 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3098 addr->index, addr->mem, am.new_op1,
3099 am.new_op2, am.ins_permuted);
3101 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3102 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3105 set_am_attributes(new_node, &am);
3106 set_ia32_ls_mode(new_node, cmp_mode);
3108 SET_IA32_ORIG_NODE(new_node, node);
3110 new_node = fix_mem_proj(new_node, &am);
3115 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3116 ia32_condition_code_t cc)
3118 dbg_info *dbgi = get_irn_dbg_info(node);
3119 ir_node *block = get_nodes_block(node);
3120 ir_node *new_block = be_transform_node(block);
3121 ir_node *val_true = get_Mux_true(node);
3122 ir_node *val_false = get_Mux_false(node);
3124 ia32_address_mode_t am;
3125 ia32_address_t *addr;
3127 assert(ia32_cg_config.use_cmov);
3128 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3132 match_arguments(&am, block, val_false, val_true, flags,
3133 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3135 if (am.ins_permuted)
3136 cc = ia32_negate_condition_code(cc);
3138 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3139 addr->mem, am.new_op1, am.new_op2, new_flags,
3141 set_am_attributes(new_node, &am);
3143 SET_IA32_ORIG_NODE(new_node, node);
3145 new_node = fix_mem_proj(new_node, &am);
3151 * Creates a ia32 Setcc instruction.
3153 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3154 ir_node *flags, ia32_condition_code_t cc,
3157 ir_mode *mode = get_irn_mode(orig_node);
3160 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3161 SET_IA32_ORIG_NODE(new_node, orig_node);
3163 /* we might need to conv the result up */
3164 if (get_mode_size_bits(mode) > 8) {
3165 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3166 nomem, new_node, mode_Bu);
3167 SET_IA32_ORIG_NODE(new_node, orig_node);
3174 * Create instruction for an unsigned Difference or Zero.
3176 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3178 ir_mode *mode = get_irn_mode(psi);
3188 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3189 match_mode_neutral | match_am | match_immediate | match_two_users);
3191 block = get_nodes_block(new_node);
3193 if (is_Proj(new_node)) {
3194 sub = get_Proj_pred(new_node);
3197 set_irn_mode(sub, mode_T);
3198 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3200 assert(is_ia32_Sub(sub));
3201 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3203 dbgi = get_irn_dbg_info(psi);
3204 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3205 notn = new_bd_ia32_Not(dbgi, block, sbb);
3207 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3208 set_ia32_commutative(new_node);
3213 * Create an const array of two float consts.
3215 * @param c0 the first constant
3216 * @param c1 the second constant
3217 * @param new_mode IN/OUT for the mode of the constants, if NULL
3218 * smallest possible mode will be used
3220 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3223 ir_mode *mode = *new_mode;
3225 ir_initializer_t *initializer;
3226 ir_tarval *tv0 = get_Const_tarval(c0);
3227 ir_tarval *tv1 = get_Const_tarval(c1);
3230 /* detect the best mode for the constants */
3231 mode = get_tarval_mode(tv0);
3233 if (mode != mode_F) {
3234 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3235 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3237 tv0 = tarval_convert_to(tv0, mode);
3238 tv1 = tarval_convert_to(tv1, mode);
3239 } else if (mode != mode_D) {
3240 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3241 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3243 tv0 = tarval_convert_to(tv0, mode);
3244 tv1 = tarval_convert_to(tv1, mode);
3251 tp = ia32_create_float_type(mode, 4);
3252 tp = ia32_create_float_array(tp);
3254 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3256 set_entity_ld_ident(ent, get_entity_ident(ent));
3257 set_entity_visibility(ent, ir_visibility_private);
3258 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3260 initializer = create_initializer_compound(2);
3262 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3263 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3265 set_entity_initializer(ent, initializer);
3272 * Possible transformations for creating a Setcc.
3274 enum setcc_transform_insn {
3287 typedef struct setcc_transform {
3289 ia32_condition_code_t cc;
3291 enum setcc_transform_insn transform;
3295 } setcc_transform_t;
3298 * Setcc can only handle 0 and 1 result.
3299 * Find a transformation that creates 0 and 1 from
3302 static void find_const_transform(ia32_condition_code_t cc,
3303 ir_tarval *t, ir_tarval *f,
3304 setcc_transform_t *res)
3310 if (tarval_is_null(t)) {
3314 cc = ia32_negate_condition_code(cc);
3315 } else if (tarval_cmp(t, f) == ir_relation_less) {
3316 // now, t is the bigger one
3320 cc = ia32_negate_condition_code(cc);
3324 if (! tarval_is_null(f)) {
3325 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3328 res->steps[step].transform = SETCC_TR_ADD;
3330 if (t == tarval_bad)
3331 panic("constant subtract failed");
3332 if (! tarval_is_long(f))
3333 panic("tarval is not long");
3335 res->steps[step].val = get_tarval_long(f);
3337 f = tarval_sub(f, f, NULL);
3338 assert(tarval_is_null(f));
3341 if (tarval_is_one(t)) {
3342 res->steps[step].transform = SETCC_TR_SET;
3343 res->num_steps = ++step;
3347 if (tarval_is_minus_one(t)) {
3348 res->steps[step].transform = SETCC_TR_NEG;
3350 res->steps[step].transform = SETCC_TR_SET;
3351 res->num_steps = ++step;
3354 if (tarval_is_long(t)) {
3355 long v = get_tarval_long(t);
3357 res->steps[step].val = 0;
3360 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3362 res->steps[step].transform = SETCC_TR_LEAxx;
3363 res->steps[step].scale = 3; /* (a << 3) + a */
3366 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3368 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3369 res->steps[step].scale = 3; /* (a << 3) */
3372 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3374 res->steps[step].transform = SETCC_TR_LEAxx;
3375 res->steps[step].scale = 2; /* (a << 2) + a */
3378 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3380 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3381 res->steps[step].scale = 2; /* (a << 2) */
3384 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3386 res->steps[step].transform = SETCC_TR_LEAxx;
3387 res->steps[step].scale = 1; /* (a << 1) + a */
3390 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3392 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3393 res->steps[step].scale = 1; /* (a << 1) */
3396 res->num_steps = step;
3399 if (! tarval_is_single_bit(t)) {
3400 res->steps[step].transform = SETCC_TR_AND;
3401 res->steps[step].val = v;
3403 res->steps[step].transform = SETCC_TR_NEG;
3405 int val = get_tarval_lowest_bit(t);
3408 res->steps[step].transform = SETCC_TR_SHL;
3409 res->steps[step].scale = val;
3413 res->steps[step].transform = SETCC_TR_SET;
3414 res->num_steps = ++step;
3417 panic("tarval is not long");
3421 * Transforms a Mux node into some code sequence.
3423 * @return The transformed node.
3425 static ir_node *gen_Mux(ir_node *node)
3427 dbg_info *dbgi = get_irn_dbg_info(node);
3428 ir_node *block = get_nodes_block(node);
3429 ir_node *new_block = be_transform_node(block);
3430 ir_node *mux_true = get_Mux_true(node);
3431 ir_node *mux_false = get_Mux_false(node);
3432 ir_node *sel = get_Mux_sel(node);
3433 ir_mode *mode = get_irn_mode(node);
3437 ia32_condition_code_t cc;
3439 assert(get_irn_mode(sel) == mode_b);
3441 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3443 if (ia32_mode_needs_gp_reg(mode)) {
3444 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3447 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3448 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3452 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3453 if (mode_is_float(mode)) {
3454 ir_node *cmp_left = get_Cmp_left(sel);
3455 ir_node *cmp_right = get_Cmp_right(sel);
3456 ir_relation relation = get_Cmp_relation(sel);
3458 if (ia32_cg_config.use_sse2) {
3459 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3460 if (cmp_left == mux_true && cmp_right == mux_false) {
3461 /* Mux(a <= b, a, b) => MIN */
3462 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3463 match_commutative | match_am | match_two_users);
3464 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3465 /* Mux(a <= b, b, a) => MAX */
3466 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3467 match_commutative | match_am | match_two_users);
3469 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3470 if (cmp_left == mux_true && cmp_right == mux_false) {
3471 /* Mux(a >= b, a, b) => MAX */
3472 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3473 match_commutative | match_am | match_two_users);
3474 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3475 /* Mux(a >= b, b, a) => MIN */
3476 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3477 match_commutative | match_am | match_two_users);
3482 if (is_Const(mux_true) && is_Const(mux_false)) {
3483 ia32_address_mode_t am;
3488 flags = get_flags_node(sel, &cc);
3489 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3491 if (ia32_cg_config.use_sse2) {
3492 /* cannot load from different mode on SSE */
3495 /* x87 can load any mode */
3499 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3501 switch (get_mode_size_bytes(new_mode)) {
3511 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3512 set_ia32_am_scale(new_node, 2);
3517 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3518 set_ia32_am_scale(new_node, 1);
3521 /* arg, shift 16 NOT supported */
3523 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3526 panic("Unsupported constant size");
3529 am.ls_mode = new_mode;
3530 am.addr.base = get_symconst_base();
3531 am.addr.index = new_node;
3532 am.addr.mem = nomem;
3534 am.addr.scale = scale;
3535 am.addr.use_frame = 0;
3536 am.addr.tls_segment = false;
3537 am.addr.frame_entity = NULL;
3538 am.addr.symconst_sign = 0;
3539 am.mem_proj = am.addr.mem;
3540 am.op_type = ia32_AddrModeS;
3543 am.pinned = op_pin_state_floats;
3545 am.ins_permuted = false;
3547 if (ia32_cg_config.use_sse2)
3548 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3550 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3551 set_am_attributes(load, &am);
3553 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3555 panic("cannot transform floating point Mux");
3558 assert(ia32_mode_needs_gp_reg(mode));
3561 ir_node *cmp_left = get_Cmp_left(sel);
3562 ir_node *cmp_right = get_Cmp_right(sel);
3563 ir_relation relation = get_Cmp_relation(sel);
3564 ir_node *val_true = mux_true;
3565 ir_node *val_false = mux_false;
3567 if (is_Const(val_true) && is_Const_null(val_true)) {
3568 ir_node *tmp = val_false;
3569 val_false = val_true;
3571 relation = get_negated_relation(relation);
3573 if (is_Const_0(val_false) && is_Sub(val_true)) {
3574 if ((relation & ir_relation_greater)
3575 && get_Sub_left(val_true) == cmp_left
3576 && get_Sub_right(val_true) == cmp_right) {
3577 return create_doz(node, cmp_left, cmp_right);
3579 if ((relation & ir_relation_less)
3580 && get_Sub_left(val_true) == cmp_right
3581 && get_Sub_right(val_true) == cmp_left) {
3582 return create_doz(node, cmp_right, cmp_left);
3587 flags = get_flags_node(sel, &cc);
3589 if (is_Const(mux_true) && is_Const(mux_false)) {
3590 /* both are const, good */
3591 ir_tarval *tv_true = get_Const_tarval(mux_true);
3592 ir_tarval *tv_false = get_Const_tarval(mux_false);
3593 setcc_transform_t res;
3596 find_const_transform(cc, tv_true, tv_false, &res);
3598 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3601 switch (res.steps[step].transform) {
3603 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3604 add_ia32_am_offs_int(new_node, res.steps[step].val);
3606 case SETCC_TR_ADDxx:
3607 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3610 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3611 set_ia32_am_scale(new_node, res.steps[step].scale);
3612 set_ia32_am_offs_int(new_node, res.steps[step].val);
3614 case SETCC_TR_LEAxx:
3615 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3616 set_ia32_am_scale(new_node, res.steps[step].scale);
3617 set_ia32_am_offs_int(new_node, res.steps[step].val);
3620 imm = ia32_immediate_from_long(res.steps[step].scale);
3621 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3624 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3627 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3630 imm = ia32_immediate_from_long(res.steps[step].val);
3631 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3634 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3637 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3640 panic("unknown setcc transform");
3644 new_node = create_CMov(node, sel, flags, cc);
3650 static ir_node *gen_ia32_l_Setcc(ir_node *node)
3652 ia32_condition_code_t cc;
3653 dbg_info *dbgi = get_irn_dbg_info(node);
3654 ir_node *block = get_nodes_block(node);
3655 ir_node *new_block = be_transform_node(block);
3656 ir_node *cond = get_irn_n(node, n_ia32_l_Setcc_cond);
3657 ir_node *flags = get_flags_node(cond, &cc);
3658 ir_node *new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3659 SET_IA32_ORIG_NODE(new_node, node);
3664 * Create a conversion from x87 state register to general purpose.
3666 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3668 ir_node *block = be_transform_node(get_nodes_block(node));
3669 ir_node *op = get_Conv_op(node);
3670 ir_node *new_op = be_transform_node(op);
3671 ir_graph *irg = current_ir_graph;
3672 dbg_info *dbgi = get_irn_dbg_info(node);
3673 ir_mode *mode = get_irn_mode(node);
3674 ir_node *frame = get_irg_frame(irg);
3675 ir_node *fist, *load, *mem;
3677 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3678 set_irn_pinned(fist, op_pin_state_floats);
3679 set_ia32_use_frame(fist);
3680 set_ia32_op_type(fist, ia32_AddrModeD);
3682 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3683 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3685 assert(get_mode_size_bits(mode) <= 32);
3686 /* exception we can only store signed 32 bit integers, so for unsigned
3687 we store a 64bit (signed) integer and load the lower bits */
3688 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3689 set_ia32_ls_mode(fist, mode_Ls);
3691 set_ia32_ls_mode(fist, mode_Is);
3693 SET_IA32_ORIG_NODE(fist, node);
3696 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3698 set_irn_pinned(load, op_pin_state_floats);
3699 set_ia32_use_frame(load);
3700 set_ia32_op_type(load, ia32_AddrModeS);
3701 set_ia32_ls_mode(load, mode_Is);
3702 if (get_ia32_ls_mode(fist) == mode_Ls) {
3703 ia32_attr_t *attr = get_ia32_attr(load);
3704 attr->data.need_64bit_stackent = 1;
3706 ia32_attr_t *attr = get_ia32_attr(load);
3707 attr->data.need_32bit_stackent = 1;
3709 SET_IA32_ORIG_NODE(load, node);
3711 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3715 * Creates a x87 strict Conv by placing a Store and a Load
3717 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3719 ir_node *block = get_nodes_block(node);
3720 ir_graph *irg = get_Block_irg(block);
3721 dbg_info *dbgi = get_irn_dbg_info(node);
3722 ir_node *frame = get_irg_frame(irg);
3724 ir_node *store, *load;
3727 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3728 set_ia32_use_frame(store);
3729 set_ia32_op_type(store, ia32_AddrModeD);
3730 SET_IA32_ORIG_NODE(store, node);
3732 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3734 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3735 set_ia32_use_frame(load);
3736 set_ia32_op_type(load, ia32_AddrModeS);
3737 SET_IA32_ORIG_NODE(load, node);
3739 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3743 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3744 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3746 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3748 func = get_mode_size_bits(mode) == 8 ?
3749 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3750 return func(dbgi, block, base, index, mem, val, mode);
3754 * Create a conversion from general purpose to x87 register
3756 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3758 ir_node *src_block = get_nodes_block(node);
3759 ir_node *block = be_transform_node(src_block);
3760 ir_graph *irg = get_Block_irg(block);
3761 dbg_info *dbgi = get_irn_dbg_info(node);
3762 ir_node *op = get_Conv_op(node);
3763 ir_node *new_op = NULL;
3765 ir_mode *store_mode;
3771 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3772 if (possible_int_mode_for_fp(src_mode)) {
3773 ia32_address_mode_t am;
3775 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3776 if (am.op_type == ia32_AddrModeS) {
3777 ia32_address_t *addr = &am.addr;
3779 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3780 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3782 set_am_attributes(fild, &am);
3783 SET_IA32_ORIG_NODE(fild, node);
3785 fix_mem_proj(fild, &am);
3790 if (new_op == NULL) {
3791 new_op = be_transform_node(op);
3794 mode = get_irn_mode(op);
3796 /* first convert to 32 bit signed if necessary */
3797 if (get_mode_size_bits(src_mode) < 32) {
3798 if (!upper_bits_clean(new_op, src_mode)) {
3799 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3800 SET_IA32_ORIG_NODE(new_op, node);
3805 assert(get_mode_size_bits(mode) == 32);
3808 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3810 set_ia32_use_frame(store);
3811 set_ia32_op_type(store, ia32_AddrModeD);
3812 set_ia32_ls_mode(store, mode_Iu);
3814 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3816 /* exception for 32bit unsigned, do a 64bit spill+load */
3817 if (!mode_is_signed(mode)) {
3820 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3822 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3823 noreg_GP, nomem, zero_const);
3824 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3826 set_ia32_use_frame(zero_store);
3827 set_ia32_op_type(zero_store, ia32_AddrModeD);
3828 add_ia32_am_offs_int(zero_store, 4);
3829 set_ia32_ls_mode(zero_store, mode_Iu);
3831 in[0] = zero_store_mem;
3834 store_mem = new_rd_Sync(dbgi, block, 2, in);
3835 store_mode = mode_Ls;
3837 store_mode = mode_Is;
3841 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3843 set_ia32_use_frame(fild);
3844 set_ia32_op_type(fild, ia32_AddrModeS);
3845 set_ia32_ls_mode(fild, store_mode);
3847 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3853 * Create a conversion from one integer mode into another one
3855 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3856 dbg_info *dbgi, ir_node *block, ir_node *op,
3859 ir_node *new_block = be_transform_node(block);
3861 ir_mode *smaller_mode;
3862 ia32_address_mode_t am;
3863 ia32_address_t *addr = &am.addr;
3866 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3867 smaller_mode = src_mode;
3869 smaller_mode = tgt_mode;
3872 #ifdef DEBUG_libfirm
3874 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3879 match_arguments(&am, block, NULL, op, NULL,
3880 match_am | match_8bit_am | match_16bit_am);
3882 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3883 /* unnecessary conv. in theory it shouldn't have been AM */
3884 assert(is_ia32_NoReg_GP(addr->base));
3885 assert(is_ia32_NoReg_GP(addr->index));
3886 assert(is_NoMem(addr->mem));
3887 assert(am.addr.offset == 0);
3888 assert(am.addr.symconst_ent == NULL);
3892 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3893 addr->mem, am.new_op2, smaller_mode);
3894 set_am_attributes(new_node, &am);
3895 /* match_arguments assume that out-mode = in-mode, this isn't true here
3897 set_ia32_ls_mode(new_node, smaller_mode);
3898 SET_IA32_ORIG_NODE(new_node, node);
3899 new_node = fix_mem_proj(new_node, &am);
3904 * Transforms a Conv node.
3906 * @return The created ia32 Conv node
3908 static ir_node *gen_Conv(ir_node *node)
3910 ir_node *block = get_nodes_block(node);
3911 ir_node *new_block = be_transform_node(block);
3912 ir_node *op = get_Conv_op(node);
3913 ir_node *new_op = NULL;
3914 dbg_info *dbgi = get_irn_dbg_info(node);
3915 ir_mode *src_mode = get_irn_mode(op);
3916 ir_mode *tgt_mode = get_irn_mode(node);
3917 int src_bits = get_mode_size_bits(src_mode);
3918 int tgt_bits = get_mode_size_bits(tgt_mode);
3919 ir_node *res = NULL;
3921 assert(!mode_is_int(src_mode) || src_bits <= 32);
3922 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3924 /* modeB -> X should already be lowered by the lower_mode_b pass */
3925 if (src_mode == mode_b) {
3926 panic("ConvB not lowered %+F", node);
3929 if (src_mode == tgt_mode) {
3930 if (get_Conv_strict(node)) {
3931 if (ia32_cg_config.use_sse2) {
3932 /* when we are in SSE mode, we can kill all strict no-op conversion */
3933 return be_transform_node(op);
3936 /* this should be optimized already, but who knows... */
3937 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3938 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3939 return be_transform_node(op);
3943 if (mode_is_float(src_mode)) {
3944 new_op = be_transform_node(op);
3945 /* we convert from float ... */
3946 if (mode_is_float(tgt_mode)) {
3948 if (ia32_cg_config.use_sse2) {
3949 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3950 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3952 set_ia32_ls_mode(res, tgt_mode);
3954 if (get_Conv_strict(node)) {
3955 /* if fp_no_float_fold is not set then we assume that we
3956 * don't have any float operations in a non
3957 * mode_float_arithmetic mode and can skip strict upconvs */
3958 if (src_bits < tgt_bits) {
3959 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3962 res = gen_x87_strict_conv(tgt_mode, new_op);
3963 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3967 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3972 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3973 if (ia32_cg_config.use_sse2) {
3974 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3976 set_ia32_ls_mode(res, src_mode);
3978 return gen_x87_fp_to_gp(node);
3982 /* we convert from int ... */
3983 if (mode_is_float(tgt_mode)) {
3985 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3986 if (ia32_cg_config.use_sse2) {
3987 new_op = be_transform_node(op);
3988 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3990 set_ia32_ls_mode(res, tgt_mode);
3992 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3993 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3994 res = gen_x87_gp_to_fp(node, src_mode);
3996 /* we need a strict-Conv, if the int mode has more bits than the
3998 if (float_mantissa < int_mantissa) {
3999 res = gen_x87_strict_conv(tgt_mode, res);
4000 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
4004 } else if (tgt_mode == mode_b) {
4005 /* mode_b lowering already took care that we only have 0/1 values */
4006 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4007 src_mode, tgt_mode));
4008 return be_transform_node(op);
4011 if (src_bits == tgt_bits) {
4012 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4013 src_mode, tgt_mode));
4014 return be_transform_node(op);
4017 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4025 static ir_node *create_immediate_or_transform(ir_node *node,
4026 char immediate_constraint_type)
4028 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4029 if (new_node == NULL) {
4030 new_node = be_transform_node(node);
4036 * Transforms a FrameAddr into an ia32 Add.
4038 static ir_node *gen_be_FrameAddr(ir_node *node)
4040 ir_node *block = be_transform_node(get_nodes_block(node));
4041 ir_node *op = be_get_FrameAddr_frame(node);
4042 ir_node *new_op = be_transform_node(op);
4043 dbg_info *dbgi = get_irn_dbg_info(node);
4046 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4047 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4048 set_ia32_use_frame(new_node);
4050 SET_IA32_ORIG_NODE(new_node, node);
4056 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4058 static ir_node *gen_be_Return(ir_node *node)
4060 ir_graph *irg = current_ir_graph;
4061 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4062 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4063 ir_node *new_ret_val = be_transform_node(ret_val);
4064 ir_node *new_ret_mem = be_transform_node(ret_mem);
4065 ir_entity *ent = get_irg_entity(irg);
4066 ir_type *tp = get_entity_type(ent);
4067 dbg_info *dbgi = get_irn_dbg_info(node);
4068 ir_node *block = be_transform_node(get_nodes_block(node));
4082 assert(ret_val != NULL);
4083 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4084 return be_duplicate_node(node);
4087 res_type = get_method_res_type(tp, 0);
4089 if (! is_Primitive_type(res_type)) {
4090 return be_duplicate_node(node);
4093 mode = get_type_mode(res_type);
4094 if (! mode_is_float(mode)) {
4095 return be_duplicate_node(node);
4098 assert(get_method_n_ress(tp) == 1);
4100 frame = get_irg_frame(irg);
4102 /* store xmm0 onto stack */
4103 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4104 new_ret_mem, new_ret_val);
4105 set_ia32_ls_mode(sse_store, mode);
4106 set_ia32_op_type(sse_store, ia32_AddrModeD);
4107 set_ia32_use_frame(sse_store);
4108 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4110 /* load into x87 register */
4111 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4112 set_ia32_op_type(fld, ia32_AddrModeS);
4113 set_ia32_use_frame(fld);
4115 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4116 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4118 /* create a new return */
4119 arity = get_irn_arity(node);
4120 in = ALLOCAN(ir_node*, arity);
4121 pop = be_Return_get_pop(node);
4122 for (i = 0; i < arity; ++i) {
4123 ir_node *op = get_irn_n(node, i);
4124 if (op == ret_val) {
4126 } else if (op == ret_mem) {
4129 in[i] = be_transform_node(op);
4132 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4133 copy_node_attr(irg, node, new_node);
4139 * Transform a be_AddSP into an ia32_SubSP.
4141 static ir_node *gen_be_AddSP(ir_node *node)
4143 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4144 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4146 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4147 match_am | match_immediate);
4148 assert(is_ia32_SubSP(new_node));
4149 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4150 &ia32_registers[REG_ESP]);
4155 * Transform a be_SubSP into an ia32_AddSP
4157 static ir_node *gen_be_SubSP(ir_node *node)
4159 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4160 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4162 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4163 match_am | match_immediate);
4164 assert(is_ia32_AddSP(new_node));
4165 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4166 &ia32_registers[REG_ESP]);
4171 * Change some phi modes
4173 static ir_node *gen_Phi(ir_node *node)
4175 const arch_register_req_t *req;
4176 ir_node *block = be_transform_node(get_nodes_block(node));
4177 ir_graph *irg = current_ir_graph;
4178 dbg_info *dbgi = get_irn_dbg_info(node);
4179 ir_mode *mode = get_irn_mode(node);
4182 if (ia32_mode_needs_gp_reg(mode)) {
4183 /* we shouldn't have any 64bit stuff around anymore */
4184 assert(get_mode_size_bits(mode) <= 32);
4185 /* all integer operations are on 32bit registers now */
4187 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4188 } else if (mode_is_float(mode)) {
4189 if (ia32_cg_config.use_sse2) {
4191 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4194 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4197 req = arch_no_register_req;
4200 /* phi nodes allow loops, so we use the old arguments for now
4201 * and fix this later */
4202 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4203 get_irn_in(node) + 1);
4204 copy_node_attr(irg, node, phi);
4205 be_duplicate_deps(node, phi);
4207 arch_set_irn_register_req_out(phi, 0, req);
4209 be_enqueue_preds(node);
4214 static ir_node *gen_Jmp(ir_node *node)
4216 ir_node *block = get_nodes_block(node);
4217 ir_node *new_block = be_transform_node(block);
4218 dbg_info *dbgi = get_irn_dbg_info(node);
4221 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4222 SET_IA32_ORIG_NODE(new_node, node);
4230 static ir_node *gen_IJmp(ir_node *node)
4232 ir_node *block = get_nodes_block(node);
4233 ir_node *new_block = be_transform_node(block);
4234 dbg_info *dbgi = get_irn_dbg_info(node);
4235 ir_node *op = get_IJmp_target(node);
4237 ia32_address_mode_t am;
4238 ia32_address_t *addr = &am.addr;
4240 assert(get_irn_mode(op) == mode_P);
4242 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4244 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4245 addr->mem, am.new_op2);
4246 set_am_attributes(new_node, &am);
4247 SET_IA32_ORIG_NODE(new_node, node);
4249 new_node = fix_mem_proj(new_node, &am);
4254 static ir_node *gen_ia32_l_Add(ir_node *node)
4256 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4257 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4258 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4259 match_commutative | match_am | match_immediate |
4260 match_mode_neutral);
4262 if (is_Proj(lowered)) {
4263 lowered = get_Proj_pred(lowered);
4265 assert(is_ia32_Add(lowered));
4266 set_irn_mode(lowered, mode_T);
4272 static ir_node *gen_ia32_l_Adc(ir_node *node)
4274 return gen_binop_flags(node, new_bd_ia32_Adc,
4275 match_commutative | match_am | match_immediate |
4276 match_mode_neutral);
4280 * Transforms a l_MulS into a "real" MulS node.
4282 * @return the created ia32 Mul node
4284 static ir_node *gen_ia32_l_Mul(ir_node *node)
4286 ir_node *left = get_binop_left(node);
4287 ir_node *right = get_binop_right(node);
4289 return gen_binop(node, left, right, new_bd_ia32_Mul,
4290 match_commutative | match_am | match_mode_neutral);
4294 * Transforms a l_IMulS into a "real" IMul1OPS node.
4296 * @return the created ia32 IMul1OP node
4298 static ir_node *gen_ia32_l_IMul(ir_node *node)
4300 ir_node *left = get_binop_left(node);
4301 ir_node *right = get_binop_right(node);
4303 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4304 match_commutative | match_am | match_mode_neutral);
4307 static ir_node *gen_ia32_l_Sub(ir_node *node)
4309 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4310 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4311 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4312 match_am | match_immediate | match_mode_neutral);
4314 if (is_Proj(lowered)) {
4315 lowered = get_Proj_pred(lowered);
4317 assert(is_ia32_Sub(lowered));
4318 set_irn_mode(lowered, mode_T);
4324 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4326 return gen_binop_flags(node, new_bd_ia32_Sbb,
4327 match_am | match_immediate | match_mode_neutral);
4330 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4332 ir_node *src_block = get_nodes_block(node);
4333 ir_node *block = be_transform_node(src_block);
4334 ir_graph *irg = current_ir_graph;
4335 dbg_info *dbgi = get_irn_dbg_info(node);
4336 ir_node *frame = get_irg_frame(irg);
4337 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4338 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4339 ir_node *new_val_low = be_transform_node(val_low);
4340 ir_node *new_val_high = be_transform_node(val_high);
4342 ir_node *sync, *fild, *res;
4344 ir_node *store_high;
4348 if (ia32_cg_config.use_sse2) {
4349 panic("ia32_l_LLtoFloat not implemented for SSE2");
4353 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4355 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4357 SET_IA32_ORIG_NODE(store_low, node);
4358 SET_IA32_ORIG_NODE(store_high, node);
4360 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4361 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4363 set_ia32_use_frame(store_low);
4364 set_ia32_use_frame(store_high);
4365 set_ia32_op_type(store_low, ia32_AddrModeD);
4366 set_ia32_op_type(store_high, ia32_AddrModeD);
4367 set_ia32_ls_mode(store_low, mode_Iu);
4368 set_ia32_ls_mode(store_high, mode_Is);
4369 add_ia32_am_offs_int(store_high, 4);
4373 sync = new_rd_Sync(dbgi, block, 2, in);
4376 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4378 set_ia32_use_frame(fild);
4379 set_ia32_op_type(fild, ia32_AddrModeS);
4380 set_ia32_ls_mode(fild, mode_Ls);
4382 SET_IA32_ORIG_NODE(fild, node);
4384 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4386 if (! mode_is_signed(get_irn_mode(val_high))) {
4387 ia32_address_mode_t am;
4389 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4392 am.addr.base = get_symconst_base();
4393 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4394 am.addr.mem = nomem;
4397 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4398 am.addr.tls_segment = false;
4399 am.addr.use_frame = 0;
4400 am.addr.frame_entity = NULL;
4401 am.addr.symconst_sign = 0;
4402 am.ls_mode = mode_F;
4403 am.mem_proj = nomem;
4404 am.op_type = ia32_AddrModeS;
4406 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4407 am.pinned = op_pin_state_floats;
4409 am.ins_permuted = false;
4411 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4412 am.new_op1, am.new_op2, get_fpcw());
4413 set_am_attributes(fadd, &am);
4415 set_irn_mode(fadd, mode_T);
4416 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4421 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4423 ir_node *src_block = get_nodes_block(node);
4424 ir_node *block = be_transform_node(src_block);
4425 ir_graph *irg = get_Block_irg(block);
4426 dbg_info *dbgi = get_irn_dbg_info(node);
4427 ir_node *frame = get_irg_frame(irg);
4428 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4429 ir_node *new_val = be_transform_node(val);
4432 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4433 SET_IA32_ORIG_NODE(fist, node);
4434 set_ia32_use_frame(fist);
4435 set_ia32_op_type(fist, ia32_AddrModeD);
4436 set_ia32_ls_mode(fist, mode_Ls);
4438 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4439 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4442 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4444 ir_node *block = be_transform_node(get_nodes_block(node));
4445 ir_graph *irg = get_Block_irg(block);
4446 ir_node *pred = get_Proj_pred(node);
4447 ir_node *new_pred = be_transform_node(pred);
4448 ir_node *frame = get_irg_frame(irg);
4449 dbg_info *dbgi = get_irn_dbg_info(node);
4450 long pn = get_Proj_proj(node);
4455 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4456 SET_IA32_ORIG_NODE(load, node);
4457 set_ia32_use_frame(load);
4458 set_ia32_op_type(load, ia32_AddrModeS);
4459 set_ia32_ls_mode(load, mode_Iu);
4460 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4461 * 32 bit from it with this particular load */
4462 attr = get_ia32_attr(load);
4463 attr->data.need_64bit_stackent = 1;
4465 if (pn == pn_ia32_l_FloattoLL_res_high) {
4466 add_ia32_am_offs_int(load, 4);
4468 assert(pn == pn_ia32_l_FloattoLL_res_low);
4471 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4477 * Transform the Projs of an AddSP.
4479 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4481 ir_node *pred = get_Proj_pred(node);
4482 ir_node *new_pred = be_transform_node(pred);
4483 dbg_info *dbgi = get_irn_dbg_info(node);
4484 long proj = get_Proj_proj(node);
4486 if (proj == pn_be_AddSP_sp) {
4487 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4488 pn_ia32_SubSP_stack);
4489 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4491 } else if (proj == pn_be_AddSP_res) {
4492 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4493 pn_ia32_SubSP_addr);
4494 } else if (proj == pn_be_AddSP_M) {
4495 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4498 panic("No idea how to transform proj->AddSP");
4502 * Transform the Projs of a SubSP.
4504 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4506 ir_node *pred = get_Proj_pred(node);
4507 ir_node *new_pred = be_transform_node(pred);
4508 dbg_info *dbgi = get_irn_dbg_info(node);
4509 long proj = get_Proj_proj(node);
4511 if (proj == pn_be_SubSP_sp) {
4512 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4513 pn_ia32_AddSP_stack);
4514 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4516 } else if (proj == pn_be_SubSP_M) {
4517 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4520 panic("No idea how to transform proj->SubSP");
4524 * Transform and renumber the Projs from a Load.
4526 static ir_node *gen_Proj_Load(ir_node *node)
4529 ir_node *pred = get_Proj_pred(node);
4530 dbg_info *dbgi = get_irn_dbg_info(node);
4531 long proj = get_Proj_proj(node);
4533 /* loads might be part of source address mode matches, so we don't
4534 * transform the ProjMs yet (with the exception of loads whose result is
4537 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4540 /* this is needed, because sometimes we have loops that are only
4541 reachable through the ProjM */
4542 be_enqueue_preds(node);
4543 /* do it in 2 steps, to silence firm verifier */
4544 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4545 set_Proj_proj(res, pn_ia32_mem);
4549 /* renumber the proj */
4550 new_pred = be_transform_node(pred);
4551 if (is_ia32_Load(new_pred)) {
4552 switch ((pn_Load)proj) {
4554 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4556 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4557 case pn_Load_X_except:
4558 /* This Load might raise an exception. Mark it. */
4559 set_ia32_exc_label(new_pred, 1);
4560 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4561 case pn_Load_X_regular:
4562 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4564 } else if (is_ia32_Conv_I2I(new_pred) ||
4565 is_ia32_Conv_I2I8Bit(new_pred)) {
4566 set_irn_mode(new_pred, mode_T);
4567 switch ((pn_Load)proj) {
4569 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4571 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4572 case pn_Load_X_except:
4573 /* This Load might raise an exception. Mark it. */
4574 set_ia32_exc_label(new_pred, 1);
4575 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4576 case pn_Load_X_regular:
4577 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4579 } else if (is_ia32_xLoad(new_pred)) {
4580 switch ((pn_Load)proj) {
4582 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4584 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4585 case pn_Load_X_except:
4586 /* This Load might raise an exception. Mark it. */
4587 set_ia32_exc_label(new_pred, 1);
4588 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4589 case pn_Load_X_regular:
4590 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4592 } else if (is_ia32_vfld(new_pred)) {
4593 switch ((pn_Load)proj) {
4595 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4597 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4598 case pn_Load_X_except:
4599 /* This Load might raise an exception. Mark it. */
4600 set_ia32_exc_label(new_pred, 1);
4601 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4602 case pn_Load_X_regular:
4603 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4606 /* can happen for ProJMs when source address mode happened for the
4609 /* however it should not be the result proj, as that would mean the
4610 load had multiple users and should not have been used for
4612 if (proj != pn_Load_M) {
4613 panic("internal error: transformed node not a Load");
4615 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4618 panic("No idea how to transform Proj(Load) %+F", node);
4621 static ir_node *gen_Proj_Store(ir_node *node)
4623 ir_node *pred = get_Proj_pred(node);
4624 ir_node *new_pred = be_transform_node(pred);
4625 dbg_info *dbgi = get_irn_dbg_info(node);
4626 long pn = get_Proj_proj(node);
4628 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4629 switch ((pn_Store)pn) {
4631 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4632 case pn_Store_X_except:
4633 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4634 case pn_Store_X_regular:
4635 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4637 } else if (is_ia32_vfist(new_pred)) {
4638 switch ((pn_Store)pn) {
4640 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4641 case pn_Store_X_except:
4642 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4643 case pn_Store_X_regular:
4644 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4646 } else if (is_ia32_vfisttp(new_pred)) {
4647 switch ((pn_Store)pn) {
4649 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4650 case pn_Store_X_except:
4651 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4652 case pn_Store_X_regular:
4653 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4655 } else if (is_ia32_vfst(new_pred)) {
4656 switch ((pn_Store)pn) {
4658 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4659 case pn_Store_X_except:
4660 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4661 case pn_Store_X_regular:
4662 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4664 } else if (is_ia32_xStore(new_pred)) {
4665 switch ((pn_Store)pn) {
4667 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4668 case pn_Store_X_except:
4669 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4670 case pn_Store_X_regular:
4671 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4673 } else if (is_Sync(new_pred)) {
4674 /* hack for the case that gen_float_const_Store produced a Sync */
4675 if (pn == pn_Store_M) {
4678 panic("exception control flow for gen_float_const_Store not implemented yet");
4679 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4680 /* destination address mode */
4681 if (pn == pn_Store_M) {
4684 panic("exception control flow for destination AM not implemented yet");
4687 panic("No idea how to transform Proj(Store) %+F", node);
4691 * Transform and renumber the Projs from a Div or Mod instruction.
4693 static ir_node *gen_Proj_Div(ir_node *node)
4695 ir_node *pred = get_Proj_pred(node);
4696 ir_node *new_pred = be_transform_node(pred);
4697 dbg_info *dbgi = get_irn_dbg_info(node);
4698 long proj = get_Proj_proj(node);
4700 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4701 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4703 switch ((pn_Div)proj) {
4705 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4706 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4707 } else if (is_ia32_xDiv(new_pred)) {
4708 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4709 } else if (is_ia32_vfdiv(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4712 panic("Div transformed to unexpected thing %+F", new_pred);
4715 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4716 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4717 } else if (is_ia32_xDiv(new_pred)) {
4718 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4719 } else if (is_ia32_vfdiv(new_pred)) {
4720 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4722 panic("Div transformed to unexpected thing %+F", new_pred);
4724 case pn_Div_X_except:
4725 set_ia32_exc_label(new_pred, 1);
4726 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4727 case pn_Div_X_regular:
4728 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4731 panic("No idea how to transform proj->Div");
4735 * Transform and renumber the Projs from a Div or Mod instruction.
4737 static ir_node *gen_Proj_Mod(ir_node *node)
4739 ir_node *pred = get_Proj_pred(node);
4740 ir_node *new_pred = be_transform_node(pred);
4741 dbg_info *dbgi = get_irn_dbg_info(node);
4742 long proj = get_Proj_proj(node);
4744 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4745 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4746 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4748 switch ((pn_Mod)proj) {
4750 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4752 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4753 case pn_Mod_X_except:
4754 set_ia32_exc_label(new_pred, 1);
4755 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4756 case pn_Mod_X_regular:
4757 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4759 panic("No idea how to transform proj->Mod");
4763 * Transform and renumber the Projs from a CopyB.
4765 static ir_node *gen_Proj_CopyB(ir_node *node)
4767 ir_node *pred = get_Proj_pred(node);
4768 ir_node *new_pred = be_transform_node(pred);
4769 dbg_info *dbgi = get_irn_dbg_info(node);
4770 long proj = get_Proj_proj(node);
4772 switch ((pn_CopyB)proj) {
4774 if (is_ia32_CopyB_i(new_pred)) {
4775 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4776 } else if (is_ia32_CopyB(new_pred)) {
4777 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4780 case pn_CopyB_X_regular:
4781 if (is_ia32_CopyB_i(new_pred)) {
4782 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4783 } else if (is_ia32_CopyB(new_pred)) {
4784 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4787 case pn_CopyB_X_except:
4788 if (is_ia32_CopyB_i(new_pred)) {
4789 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4790 } else if (is_ia32_CopyB(new_pred)) {
4791 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4796 panic("No idea how to transform proj->CopyB");
4799 static ir_node *gen_be_Call(ir_node *node)
4801 dbg_info *const dbgi = get_irn_dbg_info(node);
4802 ir_node *const src_block = get_nodes_block(node);
4803 ir_node *const block = be_transform_node(src_block);
4804 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4805 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4806 ir_node *const sp = be_transform_node(src_sp);
4807 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4808 ia32_address_mode_t am;
4809 ia32_address_t *const addr = &am.addr;
4814 ir_node * eax = noreg_GP;
4815 ir_node * ecx = noreg_GP;
4816 ir_node * edx = noreg_GP;
4817 unsigned const pop = be_Call_get_pop(node);
4818 ir_type *const call_tp = be_Call_get_type(node);
4819 int old_no_pic_adjust;
4820 int throws_exception = ir_throws_exception(node);
4822 /* Run the x87 simulator if the call returns a float value */
4823 if (get_method_n_ress(call_tp) > 0) {
4824 ir_type *const res_type = get_method_res_type(call_tp, 0);
4825 ir_mode *const res_mode = get_type_mode(res_type);
4827 if (res_mode != NULL && mode_is_float(res_mode)) {
4828 ir_graph *irg = current_ir_graph;
4829 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4830 irg_data->do_x87_sim = 1;
4834 /* We do not want be_Call direct calls */
4835 assert(be_Call_get_entity(node) == NULL);
4837 /* special case for PIC trampoline calls */
4838 old_no_pic_adjust = ia32_no_pic_adjust;
4839 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4841 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4842 match_am | match_immediate);
4844 ia32_no_pic_adjust = old_no_pic_adjust;
4846 i = get_irn_arity(node) - 1;
4847 fpcw = be_transform_node(get_irn_n(node, i--));
4848 for (; i >= n_be_Call_first_arg; --i) {
4849 arch_register_req_t const *const req
4850 = arch_get_irn_register_req_in(node, i);
4851 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4853 assert(req->type == arch_register_req_type_limited);
4854 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4856 switch (*req->limited) {
4857 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4858 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4859 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4860 default: panic("Invalid GP register for register parameter");
4864 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4865 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4866 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4867 ir_set_throws_exception(call, throws_exception);
4868 set_am_attributes(call, &am);
4869 call = fix_mem_proj(call, &am);
4871 if (get_irn_pinned(node) == op_pin_state_pinned)
4872 set_irn_pinned(call, op_pin_state_pinned);
4874 SET_IA32_ORIG_NODE(call, node);
4876 if (ia32_cg_config.use_sse2) {
4877 /* remember this call for post-processing */
4878 ARR_APP1(ir_node *, call_list, call);
4879 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4886 * Transform Builtin trap
4888 static ir_node *gen_trap(ir_node *node)
4890 dbg_info *dbgi = get_irn_dbg_info(node);
4891 ir_node *block = be_transform_node(get_nodes_block(node));
4892 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4894 return new_bd_ia32_UD2(dbgi, block, mem);
4898 * Transform Builtin debugbreak
4900 static ir_node *gen_debugbreak(ir_node *node)
4902 dbg_info *dbgi = get_irn_dbg_info(node);
4903 ir_node *block = be_transform_node(get_nodes_block(node));
4904 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4906 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4910 * Transform Builtin return_address
4912 static ir_node *gen_return_address(ir_node *node)
4914 ir_node *param = get_Builtin_param(node, 0);
4915 ir_node *frame = get_Builtin_param(node, 1);
4916 dbg_info *dbgi = get_irn_dbg_info(node);
4917 ir_tarval *tv = get_Const_tarval(param);
4918 ir_graph *irg = get_irn_irg(node);
4919 unsigned long value = get_tarval_long(tv);
4921 ir_node *block = be_transform_node(get_nodes_block(node));
4922 ir_node *ptr = be_transform_node(frame);
4926 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4927 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4928 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4931 /* load the return address from this frame */
4932 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4934 set_irn_pinned(load, get_irn_pinned(node));
4935 set_ia32_op_type(load, ia32_AddrModeS);
4936 set_ia32_ls_mode(load, mode_Iu);
4938 set_ia32_am_offs_int(load, 0);
4939 set_ia32_use_frame(load);
4940 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4942 if (get_irn_pinned(node) == op_pin_state_floats) {
4943 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4944 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4945 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4946 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4949 SET_IA32_ORIG_NODE(load, node);
4950 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4954 * Transform Builtin frame_address
4956 static ir_node *gen_frame_address(ir_node *node)
4958 ir_node *param = get_Builtin_param(node, 0);
4959 ir_node *frame = get_Builtin_param(node, 1);
4960 dbg_info *dbgi = get_irn_dbg_info(node);
4961 ir_tarval *tv = get_Const_tarval(param);
4962 ir_graph *irg = get_irn_irg(node);
4963 unsigned long value = get_tarval_long(tv);
4965 ir_node *block = be_transform_node(get_nodes_block(node));
4966 ir_node *ptr = be_transform_node(frame);
4971 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4972 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4973 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4976 /* load the frame address from this frame */
4977 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4979 set_irn_pinned(load, get_irn_pinned(node));
4980 set_ia32_op_type(load, ia32_AddrModeS);
4981 set_ia32_ls_mode(load, mode_Iu);
4983 ent = ia32_get_frame_address_entity(irg);
4985 set_ia32_am_offs_int(load, 0);
4986 set_ia32_use_frame(load);
4987 set_ia32_frame_ent(load, ent);
4989 /* will fail anyway, but gcc does this: */
4990 set_ia32_am_offs_int(load, 0);
4993 if (get_irn_pinned(node) == op_pin_state_floats) {
4994 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4995 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4996 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4997 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
5000 SET_IA32_ORIG_NODE(load, node);
5001 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
5005 * Transform Builtin frame_address
5007 static ir_node *gen_prefetch(ir_node *node)
5010 ir_node *ptr, *block, *mem, *base, *idx;
5011 ir_node *param, *new_node;
5014 ia32_address_t addr;
5016 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5017 /* no prefetch at all, route memory */
5018 return be_transform_node(get_Builtin_mem(node));
5021 param = get_Builtin_param(node, 1);
5022 tv = get_Const_tarval(param);
5023 rw = get_tarval_long(tv);
5025 /* construct load address */
5026 memset(&addr, 0, sizeof(addr));
5027 ptr = get_Builtin_param(node, 0);
5028 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5035 base = be_transform_node(base);
5041 idx = be_transform_node(idx);
5044 dbgi = get_irn_dbg_info(node);
5045 block = be_transform_node(get_nodes_block(node));
5046 mem = be_transform_node(get_Builtin_mem(node));
5048 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5049 /* we have 3DNow!, this was already checked above */
5050 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
5051 } else if (ia32_cg_config.use_sse_prefetch) {
5052 /* note: rw == 1 is IGNORED in that case */
5053 param = get_Builtin_param(node, 2);
5054 tv = get_Const_tarval(param);
5055 locality = get_tarval_long(tv);
5057 /* SSE style prefetch */
5060 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5063 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5066 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5069 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5073 assert(ia32_cg_config.use_3dnow_prefetch);
5074 /* 3DNow! style prefetch */
5075 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5078 set_irn_pinned(new_node, get_irn_pinned(node));
5079 set_ia32_op_type(new_node, ia32_AddrModeS);
5080 set_ia32_ls_mode(new_node, mode_Bu);
5081 set_address(new_node, &addr);
5083 SET_IA32_ORIG_NODE(new_node, node);
5085 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5089 * Transform bsf like node
5091 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5093 ir_node *param = get_Builtin_param(node, 0);
5094 dbg_info *dbgi = get_irn_dbg_info(node);
5096 ir_node *block = get_nodes_block(node);
5097 ir_node *new_block = be_transform_node(block);
5099 ia32_address_mode_t am;
5100 ia32_address_t *addr = &am.addr;
5103 match_arguments(&am, block, NULL, param, NULL, match_am);
5105 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5106 set_am_attributes(cnt, &am);
5107 set_ia32_ls_mode(cnt, get_irn_mode(param));
5109 SET_IA32_ORIG_NODE(cnt, node);
5110 return fix_mem_proj(cnt, &am);
5114 * Transform builtin ffs.
5116 static ir_node *gen_ffs(ir_node *node)
5118 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5119 ir_node *real = skip_Proj(bsf);
5120 dbg_info *dbgi = get_irn_dbg_info(real);
5121 ir_node *block = get_nodes_block(real);
5122 ir_node *flag, *set, *conv, *neg, *orn, *add;
5125 if (get_irn_mode(real) != mode_T) {
5126 set_irn_mode(real, mode_T);
5127 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5130 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5133 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5134 SET_IA32_ORIG_NODE(set, node);
5137 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5138 SET_IA32_ORIG_NODE(conv, node);
5141 neg = new_bd_ia32_Neg(dbgi, block, conv);
5144 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5145 set_ia32_commutative(orn);
5148 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5149 add_ia32_am_offs_int(add, 1);
5154 * Transform builtin clz.
5156 static ir_node *gen_clz(ir_node *node)
5158 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5159 ir_node *real = skip_Proj(bsr);
5160 dbg_info *dbgi = get_irn_dbg_info(real);
5161 ir_node *block = get_nodes_block(real);
5162 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5164 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5168 * Transform builtin ctz.
5170 static ir_node *gen_ctz(ir_node *node)
5172 return gen_unop_AM(node, new_bd_ia32_Bsf);
5176 * Transform builtin parity.
5178 static ir_node *gen_parity(ir_node *node)
5180 dbg_info *dbgi = get_irn_dbg_info(node);
5181 ir_node *block = get_nodes_block(node);
5182 ir_node *new_block = be_transform_node(block);
5183 ir_node *param = get_Builtin_param(node, 0);
5184 ir_node *new_param = be_transform_node(param);
5187 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5188 * so we have to do complicated xoring first.
5189 * (we should also better lower this before the backend so we still have a
5190 * chance for CSE, constant folding and other goodies for some of these
5193 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5194 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5195 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5197 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5200 set_ia32_commutative(xor);
5202 set_irn_mode(xor2, mode_T);
5203 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5206 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5207 SET_IA32_ORIG_NODE(new_node, node);
5210 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5211 nomem, new_node, mode_Bu);
5212 SET_IA32_ORIG_NODE(new_node, node);
5217 * Transform builtin popcount
5219 static ir_node *gen_popcount(ir_node *node)
5221 ir_node *param = get_Builtin_param(node, 0);
5222 dbg_info *dbgi = get_irn_dbg_info(node);
5224 ir_node *block = get_nodes_block(node);
5225 ir_node *new_block = be_transform_node(block);
5228 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5230 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5231 if (ia32_cg_config.use_popcnt) {
5232 ia32_address_mode_t am;
5233 ia32_address_t *addr = &am.addr;
5236 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5238 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5239 set_am_attributes(cnt, &am);
5240 set_ia32_ls_mode(cnt, get_irn_mode(param));
5242 SET_IA32_ORIG_NODE(cnt, node);
5243 return fix_mem_proj(cnt, &am);
5246 new_param = be_transform_node(param);
5248 /* do the standard popcount algo */
5249 /* TODO: This is stupid, we should transform this before the backend,
5250 * to get CSE, localopts, etc. for the operations
5251 * TODO: This is also not the optimal algorithm (it is just the starting
5252 * example in hackers delight, they optimize it more on the following page)
5253 * But I'm too lazy to fix this now, as the code should get lowered before
5254 * the backend anyway.
5257 /* m1 = x & 0x55555555 */
5258 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5259 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5262 simm = ia32_create_Immediate(NULL, 0, 1);
5263 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5265 /* m2 = s1 & 0x55555555 */
5266 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5269 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5271 /* m4 = m3 & 0x33333333 */
5272 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5273 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5276 simm = ia32_create_Immediate(NULL, 0, 2);
5277 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5279 /* m5 = s2 & 0x33333333 */
5280 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5283 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5285 /* m7 = m6 & 0x0F0F0F0F */
5286 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5287 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5290 simm = ia32_create_Immediate(NULL, 0, 4);
5291 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5293 /* m8 = s3 & 0x0F0F0F0F */
5294 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5297 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5299 /* m10 = m9 & 0x00FF00FF */
5300 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5301 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5304 simm = ia32_create_Immediate(NULL, 0, 8);
5305 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5307 /* m11 = s4 & 0x00FF00FF */
5308 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5310 /* m12 = m10 + m11 */
5311 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5313 /* m13 = m12 & 0x0000FFFF */
5314 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5315 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5317 /* s5 = m12 >> 16 */
5318 simm = ia32_create_Immediate(NULL, 0, 16);
5319 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5321 /* res = m13 + s5 */
5322 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5326 * Transform builtin byte swap.
5328 static ir_node *gen_bswap(ir_node *node)
5330 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5331 dbg_info *dbgi = get_irn_dbg_info(node);
5333 ir_node *block = get_nodes_block(node);
5334 ir_node *new_block = be_transform_node(block);
5335 ir_mode *mode = get_irn_mode(param);
5336 unsigned size = get_mode_size_bits(mode);
5337 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5341 if (ia32_cg_config.use_i486) {
5342 /* swap available */
5343 return new_bd_ia32_Bswap(dbgi, new_block, param);
5345 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5346 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5348 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5349 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5351 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5353 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5354 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5356 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5357 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5360 /* swap16 always available */
5361 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5364 panic("Invalid bswap size (%d)", size);
5369 * Transform builtin outport.
5371 static ir_node *gen_outport(ir_node *node)
5373 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5374 ir_node *oldv = get_Builtin_param(node, 1);
5375 ir_mode *mode = get_irn_mode(oldv);
5376 ir_node *value = be_transform_node(oldv);
5377 ir_node *block = be_transform_node(get_nodes_block(node));
5378 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5379 dbg_info *dbgi = get_irn_dbg_info(node);
5381 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5382 set_ia32_ls_mode(res, mode);
5387 * Transform builtin inport.
5389 static ir_node *gen_inport(ir_node *node)
5391 ir_type *tp = get_Builtin_type(node);
5392 ir_type *rstp = get_method_res_type(tp, 0);
5393 ir_mode *mode = get_type_mode(rstp);
5394 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5395 ir_node *block = be_transform_node(get_nodes_block(node));
5396 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5397 dbg_info *dbgi = get_irn_dbg_info(node);
5399 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5400 set_ia32_ls_mode(res, mode);
5402 /* check for missing Result Proj */
5407 * Transform a builtin inner trampoline
5409 static ir_node *gen_inner_trampoline(ir_node *node)
5411 ir_node *ptr = get_Builtin_param(node, 0);
5412 ir_node *callee = get_Builtin_param(node, 1);
5413 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5414 ir_node *mem = get_Builtin_mem(node);
5415 ir_node *block = get_nodes_block(node);
5416 ir_node *new_block = be_transform_node(block);
5420 ir_node *trampoline;
5422 dbg_info *dbgi = get_irn_dbg_info(node);
5423 ia32_address_t addr;
5425 /* construct store address */
5426 memset(&addr, 0, sizeof(addr));
5427 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5429 if (addr.base == NULL) {
5430 addr.base = noreg_GP;
5432 addr.base = be_transform_node(addr.base);
5435 if (addr.index == NULL) {
5436 addr.index = noreg_GP;
5438 addr.index = be_transform_node(addr.index);
5440 addr.mem = be_transform_node(mem);
5442 /* mov ecx, <env> */
5443 val = ia32_create_Immediate(NULL, 0, 0xB9);
5444 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5445 addr.index, addr.mem, val);
5446 set_irn_pinned(store, get_irn_pinned(node));
5447 set_ia32_op_type(store, ia32_AddrModeD);
5448 set_ia32_ls_mode(store, mode_Bu);
5449 set_address(store, &addr);
5453 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5454 addr.index, addr.mem, env);
5455 set_irn_pinned(store, get_irn_pinned(node));
5456 set_ia32_op_type(store, ia32_AddrModeD);
5457 set_ia32_ls_mode(store, mode_Iu);
5458 set_address(store, &addr);
5462 /* jmp rel <callee> */
5463 val = ia32_create_Immediate(NULL, 0, 0xE9);
5464 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5465 addr.index, addr.mem, val);
5466 set_irn_pinned(store, get_irn_pinned(node));
5467 set_ia32_op_type(store, ia32_AddrModeD);
5468 set_ia32_ls_mode(store, mode_Bu);
5469 set_address(store, &addr);
5473 trampoline = be_transform_node(ptr);
5475 /* the callee is typically an immediate */
5476 if (is_SymConst(callee)) {
5477 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5479 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5480 add_ia32_am_offs_int(rel, -10);
5482 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5484 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5485 addr.index, addr.mem, rel);
5486 set_irn_pinned(store, get_irn_pinned(node));
5487 set_ia32_op_type(store, ia32_AddrModeD);
5488 set_ia32_ls_mode(store, mode_Iu);
5489 set_address(store, &addr);
5494 return new_r_Tuple(new_block, 2, in);
5498 * Transform Builtin node.
5500 static ir_node *gen_Builtin(ir_node *node)
5502 ir_builtin_kind kind = get_Builtin_kind(node);
5506 return gen_trap(node);
5507 case ir_bk_debugbreak:
5508 return gen_debugbreak(node);
5509 case ir_bk_return_address:
5510 return gen_return_address(node);
5511 case ir_bk_frame_address:
5512 return gen_frame_address(node);
5513 case ir_bk_prefetch:
5514 return gen_prefetch(node);
5516 return gen_ffs(node);
5518 return gen_clz(node);
5520 return gen_ctz(node);
5522 return gen_parity(node);
5523 case ir_bk_popcount:
5524 return gen_popcount(node);
5526 return gen_bswap(node);
5528 return gen_outport(node);
5530 return gen_inport(node);
5531 case ir_bk_inner_trampoline:
5532 return gen_inner_trampoline(node);
5534 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5538 * Transform Proj(Builtin) node.
5540 static ir_node *gen_Proj_Builtin(ir_node *proj)
5542 ir_node *node = get_Proj_pred(proj);
5543 ir_node *new_node = be_transform_node(node);
5544 ir_builtin_kind kind = get_Builtin_kind(node);
5547 case ir_bk_return_address:
5548 case ir_bk_frame_address:
5553 case ir_bk_popcount:
5555 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5558 case ir_bk_debugbreak:
5559 case ir_bk_prefetch:
5561 assert(get_Proj_proj(proj) == pn_Builtin_M);
5564 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5565 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5567 assert(get_Proj_proj(proj) == pn_Builtin_M);
5568 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5570 case ir_bk_inner_trampoline:
5571 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5572 return get_Tuple_pred(new_node, 1);
5574 assert(get_Proj_proj(proj) == pn_Builtin_M);
5575 return get_Tuple_pred(new_node, 0);
5578 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5581 static ir_node *gen_be_IncSP(ir_node *node)
5583 ir_node *res = be_duplicate_node(node);
5584 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5590 * Transform the Projs from a be_Call.
5592 static ir_node *gen_Proj_be_Call(ir_node *node)
5594 ir_node *call = get_Proj_pred(node);
5595 ir_node *new_call = be_transform_node(call);
5596 dbg_info *dbgi = get_irn_dbg_info(node);
5597 long proj = get_Proj_proj(node);
5598 ir_mode *mode = get_irn_mode(node);
5601 if (proj == pn_be_Call_M) {
5602 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5604 /* transform call modes */
5605 if (mode_is_data(mode)) {
5606 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5610 /* Map from be_Call to ia32_Call proj number */
5611 if (proj == pn_be_Call_sp) {
5612 proj = pn_ia32_Call_stack;
5613 } else if (proj == pn_be_Call_M) {
5614 proj = pn_ia32_Call_M;
5615 } else if (proj == pn_be_Call_X_except) {
5616 proj = pn_ia32_Call_X_except;
5617 } else if (proj == pn_be_Call_X_regular) {
5618 proj = pn_ia32_Call_X_regular;
5620 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5621 int const n_outs = arch_get_irn_n_outs(new_call);
5624 assert(proj >= pn_be_Call_first_res);
5625 assert(req->type & arch_register_req_type_limited);
5627 for (i = 0; i < n_outs; ++i) {
5628 arch_register_req_t const *const new_req
5629 = arch_get_irn_register_req_out(new_call, i);
5631 if (!(new_req->type & arch_register_req_type_limited) ||
5632 new_req->cls != req->cls ||
5633 *new_req->limited != *req->limited)
5642 res = new_rd_Proj(dbgi, new_call, mode, proj);
5644 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5646 case pn_ia32_Call_stack:
5647 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5650 case pn_ia32_Call_fpcw:
5651 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5659 * Transform the Projs from a Cmp.
5661 static ir_node *gen_Proj_Cmp(ir_node *node)
5663 /* this probably means not all mode_b nodes were lowered... */
5664 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5668 static ir_node *gen_Proj_ASM(ir_node *node)
5670 ir_mode *mode = get_irn_mode(node);
5671 ir_node *pred = get_Proj_pred(node);
5672 ir_node *new_pred = be_transform_node(pred);
5673 long pos = get_Proj_proj(node);
5675 if (mode == mode_M) {
5676 pos = arch_get_irn_n_outs(new_pred)-1;
5677 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5679 } else if (mode_is_float(mode)) {
5682 panic("unexpected proj mode at ASM");
5685 return new_r_Proj(new_pred, mode, pos);
5689 * Transform and potentially renumber Proj nodes.
5691 static ir_node *gen_Proj(ir_node *node)
5693 ir_node *pred = get_Proj_pred(node);
5696 switch (get_irn_opcode(pred)) {
5698 return gen_Proj_Load(node);
5700 return gen_Proj_Store(node);
5702 return gen_Proj_ASM(node);
5704 return gen_Proj_Builtin(node);
5706 return gen_Proj_Div(node);
5708 return gen_Proj_Mod(node);
5710 return gen_Proj_CopyB(node);
5712 return gen_Proj_be_SubSP(node);
5714 return gen_Proj_be_AddSP(node);
5716 return gen_Proj_be_Call(node);
5718 return gen_Proj_Cmp(node);
5720 proj = get_Proj_proj(node);
5722 case pn_Start_X_initial_exec: {
5723 ir_node *block = get_nodes_block(pred);
5724 ir_node *new_block = be_transform_node(block);
5725 dbg_info *dbgi = get_irn_dbg_info(node);
5726 /* we exchange the ProjX with a jump */
5727 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5735 if (is_ia32_l_FloattoLL(pred)) {
5736 return gen_Proj_l_FloattoLL(node);
5738 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5742 ir_mode *mode = get_irn_mode(node);
5743 if (ia32_mode_needs_gp_reg(mode)) {
5744 ir_node *new_pred = be_transform_node(pred);
5745 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5746 get_Proj_proj(node));
5747 new_proj->node_nr = node->node_nr;
5752 return be_duplicate_node(node);
5756 * Enters all transform functions into the generic pointer
5758 static void register_transformers(void)
5760 /* first clear the generic function pointer for all ops */
5761 be_start_transform_setup();
5763 be_set_transform_function(op_Add, gen_Add);
5764 be_set_transform_function(op_And, gen_And);
5765 be_set_transform_function(op_ASM, ia32_gen_ASM);
5766 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5767 be_set_transform_function(op_be_Call, gen_be_Call);
5768 be_set_transform_function(op_be_Copy, gen_be_Copy);
5769 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5770 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5771 be_set_transform_function(op_be_Return, gen_be_Return);
5772 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5773 be_set_transform_function(op_Builtin, gen_Builtin);
5774 be_set_transform_function(op_Cmp, gen_Cmp);
5775 be_set_transform_function(op_Cond, gen_Cond);
5776 be_set_transform_function(op_Const, gen_Const);
5777 be_set_transform_function(op_Conv, gen_Conv);
5778 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5779 be_set_transform_function(op_Div, gen_Div);
5780 be_set_transform_function(op_Eor, gen_Eor);
5781 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5782 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5783 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5784 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5785 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5786 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5787 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5788 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5789 be_set_transform_function(op_ia32_l_Setcc, gen_ia32_l_Setcc);
5790 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5791 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5792 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5793 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5794 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5795 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5796 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5797 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5798 be_set_transform_function(op_IJmp, gen_IJmp);
5799 be_set_transform_function(op_Jmp, gen_Jmp);
5800 be_set_transform_function(op_Load, gen_Load);
5801 be_set_transform_function(op_Minus, gen_Minus);
5802 be_set_transform_function(op_Mod, gen_Mod);
5803 be_set_transform_function(op_Mul, gen_Mul);
5804 be_set_transform_function(op_Mulh, gen_Mulh);
5805 be_set_transform_function(op_Mux, gen_Mux);
5806 be_set_transform_function(op_Not, gen_Not);
5807 be_set_transform_function(op_Or, gen_Or);
5808 be_set_transform_function(op_Phi, gen_Phi);
5809 be_set_transform_function(op_Proj, gen_Proj);
5810 be_set_transform_function(op_Rotl, gen_Rotl);
5811 be_set_transform_function(op_Shl, gen_Shl);
5812 be_set_transform_function(op_Shr, gen_Shr);
5813 be_set_transform_function(op_Shrs, gen_Shrs);
5814 be_set_transform_function(op_Store, gen_Store);
5815 be_set_transform_function(op_Sub, gen_Sub);
5816 be_set_transform_function(op_SymConst, gen_SymConst);
5817 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5821 * Pre-transform all unknown and noreg nodes.
5823 static void ia32_pretransform_node(void)
5825 ir_graph *irg = current_ir_graph;
5826 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5828 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5829 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5830 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5831 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5832 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5834 nomem = get_irg_no_mem(irg);
5835 noreg_GP = ia32_new_NoReg_gp(irg);
5839 * Post-process all calls if we are in SSE mode.
5840 * The ABI requires that the results are in st0, copy them
5841 * to a xmm register.
5843 static void postprocess_fp_call_results(void)
5847 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5848 ir_node *call = call_list[i];
5849 ir_type *mtp = call_types[i];
5852 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5853 ir_type *res_tp = get_method_res_type(mtp, j);
5854 ir_node *res, *new_res;
5855 const ir_edge_t *edge, *next;
5858 if (! is_atomic_type(res_tp)) {
5859 /* no floating point return */
5862 res_mode = get_type_mode(res_tp);
5863 if (! mode_is_float(res_mode)) {
5864 /* no floating point return */
5868 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5871 /* now patch the users */
5872 foreach_out_edge_safe(res, edge, next) {
5873 ir_node *succ = get_edge_src_irn(edge);
5876 if (be_is_Keep(succ))
5879 if (is_ia32_xStore(succ)) {
5880 /* an xStore can be patched into an vfst */
5881 dbg_info *db = get_irn_dbg_info(succ);
5882 ir_node *block = get_nodes_block(succ);
5883 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5884 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5885 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5886 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5887 ir_mode *mode = get_ia32_ls_mode(succ);
5889 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5890 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5891 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5892 if (is_ia32_use_frame(succ))
5893 set_ia32_use_frame(st);
5894 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5895 set_irn_pinned(st, get_irn_pinned(succ));
5896 set_ia32_op_type(st, ia32_AddrModeD);
5898 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5899 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5900 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5907 if (new_res == NULL) {
5908 dbg_info *db = get_irn_dbg_info(call);
5909 ir_node *block = get_nodes_block(call);
5910 ir_node *frame = get_irg_frame(current_ir_graph);
5911 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5912 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5913 ir_node *vfst, *xld, *new_mem;
5916 /* store st(0) on stack */
5917 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5919 set_ia32_op_type(vfst, ia32_AddrModeD);
5920 set_ia32_use_frame(vfst);
5922 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5924 /* load into SSE register */
5925 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5927 set_ia32_op_type(xld, ia32_AddrModeS);
5928 set_ia32_use_frame(xld);
5930 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5931 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5933 if (old_mem != NULL) {
5934 edges_reroute(old_mem, new_mem);
5938 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5944 /* do the transformation */
5945 void ia32_transform_graph(ir_graph *irg)
5949 register_transformers();
5950 initial_fpcw = NULL;
5951 ia32_no_pic_adjust = 0;
5953 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5955 be_timer_push(T_HEIGHTS);
5956 ia32_heights = heights_new(irg);
5957 be_timer_pop(T_HEIGHTS);
5958 ia32_calculate_non_address_mode_nodes(irg);
5960 /* the transform phase is not safe for CSE (yet) because several nodes get
5961 * attributes set after their creation */
5962 cse_last = get_opt_cse();
5965 call_list = NEW_ARR_F(ir_node *, 0);
5966 call_types = NEW_ARR_F(ir_type *, 0);
5967 be_transform_graph(irg, ia32_pretransform_node);
5969 if (ia32_cg_config.use_sse2)
5970 postprocess_fp_call_results();
5971 DEL_ARR_F(call_types);
5972 DEL_ARR_F(call_list);
5974 set_opt_cse(cse_last);
5976 ia32_free_non_address_mode_nodes();
5977 heights_free(ia32_heights);
5978 ia32_heights = NULL;
5981 void ia32_init_transform(void)
5983 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");