2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
884 am->op_type = ia32_Normal;
886 if (flags & match_try_am) {
892 mode = get_irn_mode(op2);
893 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
894 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
896 new_op2 = create_upconv(op2, NULL);
897 am->ls_mode = mode_Iu;
899 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
901 new_op2 = be_transform_node(op2);
902 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
905 if (addr->base == NULL)
906 addr->base = noreg_GP;
907 if (addr->index == NULL)
908 addr->index = noreg_GP;
909 if (addr->mem == NULL)
912 am->new_op1 = new_op1;
913 am->new_op2 = new_op2;
914 am->commutative = commutative;
918 * "Fixes" a node that uses address mode by turning it into mode_T
919 * and returning a pn_ia32_res Proj.
921 * @param node the node
922 * @param am its address mode
924 * @return a Proj(pn_ia32_res) if a memory address mode is used,
927 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
932 if (am->mem_proj == NULL)
935 /* we have to create a mode_T so the old MemProj can attach to us */
936 mode = get_irn_mode(node);
937 load = get_Proj_pred(am->mem_proj);
939 be_set_transformed_node(load, node);
941 if (mode != mode_T) {
942 set_irn_mode(node, mode_T);
943 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
950 * Construct a standard binary operation, set AM and immediate if required.
952 * @param node The original node for which the binop is created
953 * @param op1 The first operand
954 * @param op2 The second operand
955 * @param func The node constructor function
956 * @return The constructed ia32 node.
958 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
959 construct_binop_func *func, match_flags_t flags)
962 ir_node *block, *new_block, *new_node;
963 ia32_address_mode_t am;
964 ia32_address_t *addr = &am.addr;
966 block = get_nodes_block(node);
967 match_arguments(&am, block, op1, op2, NULL, flags);
969 dbgi = get_irn_dbg_info(node);
970 new_block = be_transform_node(block);
971 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
972 am.new_op1, am.new_op2);
973 set_am_attributes(new_node, &am);
974 /* we can't use source address mode anymore when using immediates */
975 if (!(flags & match_am_and_immediates) &&
976 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
977 set_ia32_am_support(new_node, ia32_am_none);
978 SET_IA32_ORIG_NODE(new_node, node);
980 new_node = fix_mem_proj(new_node, &am);
986 * Generic names for the inputs of an ia32 binary op.
989 n_ia32_l_binop_left, /**< ia32 left input */
990 n_ia32_l_binop_right, /**< ia32 right input */
991 n_ia32_l_binop_eflags /**< ia32 eflags input */
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1001 * Construct a binary operation which also consumes the eflags.
1003 * @param node The node to transform
1004 * @param func The node constructor function
1005 * @param flags The match flags
1006 * @return The constructor ia32 node
1008 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1009 match_flags_t flags)
1011 ir_node *src_block = get_nodes_block(node);
1012 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1013 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1014 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1016 ir_node *block, *new_node, *new_eflags;
1017 ia32_address_mode_t am;
1018 ia32_address_t *addr = &am.addr;
1020 match_arguments(&am, src_block, op1, op2, eflags, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 block = be_transform_node(src_block);
1024 new_eflags = be_transform_node(eflags);
1025 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1026 am.new_op1, am.new_op2, new_eflags);
1027 set_am_attributes(new_node, &am);
1028 /* we can't use source address mode anymore when using immediates */
1029 if (!(flags & match_am_and_immediates) &&
1030 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1031 set_ia32_am_support(new_node, ia32_am_none);
1032 SET_IA32_ORIG_NODE(new_node, node);
1034 new_node = fix_mem_proj(new_node, &am);
1039 static ir_node *get_fpcw(void)
1041 if (initial_fpcw != NULL)
1042 return initial_fpcw;
1044 initial_fpcw = be_transform_node(old_initial_fpcw);
1045 return initial_fpcw;
1049 * Construct a standard binary operation, set AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_binop_float_func *func)
1059 ir_mode *mode = get_irn_mode(node);
1061 ir_node *block, *new_block, *new_node;
1062 ia32_address_mode_t am;
1063 ia32_address_t *addr = &am.addr;
1064 ia32_x87_attr_t *attr;
1065 /* All operations are considered commutative, because there are reverse
1067 match_flags_t flags = match_commutative;
1069 /* happens for div nodes... */
1070 if (mode == mode_T) {
1072 mode = get_Div_resmode(node);
1074 panic("can't determine mode");
1077 /* cannot use address mode with long double on x87 */
1078 if (get_mode_size_bits(mode) <= 64)
1081 block = get_nodes_block(node);
1082 match_arguments(&am, block, op1, op2, NULL, flags);
1084 dbgi = get_irn_dbg_info(node);
1085 new_block = be_transform_node(block);
1086 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1087 am.new_op1, am.new_op2, get_fpcw());
1088 set_am_attributes(new_node, &am);
1090 attr = get_ia32_x87_attr(new_node);
1091 attr->attr.data.ins_permuted = am.ins_permuted;
1093 SET_IA32_ORIG_NODE(new_node, node);
1095 new_node = fix_mem_proj(new_node, &am);
1101 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1103 * @param op1 The first operand
1104 * @param op2 The second operand
1105 * @param func The node constructor function
1106 * @return The constructed ia32 node.
1108 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1109 construct_shift_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1115 assert(! mode_is_float(get_irn_mode(node)));
1116 assert(flags & match_immediate);
1117 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1119 if (flags & match_mode_neutral) {
1120 op1 = ia32_skip_downconv(op1);
1121 new_op1 = be_transform_node(op1);
1122 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1123 new_op1 = create_upconv(op1, node);
1125 new_op1 = be_transform_node(op1);
1128 /* the shift amount can be any mode that is bigger than 5 bits, since all
1129 * other bits are ignored anyway */
1130 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1131 ir_node *const op = get_Conv_op(op2);
1132 if (mode_is_float(get_irn_mode(op)))
1135 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1137 new_op2 = create_immediate_or_transform(op2, 0);
1139 dbgi = get_irn_dbg_info(node);
1140 block = get_nodes_block(node);
1141 new_block = be_transform_node(block);
1142 new_node = func(dbgi, new_block, new_op1, new_op2);
1143 SET_IA32_ORIG_NODE(new_node, node);
1145 /* lowered shift instruction may have a dependency operand, handle it here */
1146 if (get_irn_arity(node) == 3) {
1147 /* we have a dependency */
1148 ir_node* dep = get_irn_n(node, 2);
1149 if (get_irn_n_edges(dep) > 1) {
1150 /* ... which has at least one user other than 'node' */
1151 ir_node *new_dep = be_transform_node(dep);
1152 add_irn_dep(new_node, new_dep);
1161 * Construct a standard unary operation, set AM and immediate if required.
1163 * @param op The operand
1164 * @param func The node constructor function
1165 * @return The constructed ia32 node.
1167 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1168 match_flags_t flags)
1171 ir_node *block, *new_block, *new_op, *new_node;
1173 assert(flags == 0 || flags == match_mode_neutral);
1174 if (flags & match_mode_neutral) {
1175 op = ia32_skip_downconv(op);
1178 new_op = be_transform_node(op);
1179 dbgi = get_irn_dbg_info(node);
1180 block = get_nodes_block(node);
1181 new_block = be_transform_node(block);
1182 new_node = func(dbgi, new_block, new_op);
1184 SET_IA32_ORIG_NODE(new_node, node);
1189 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1190 ia32_address_t *addr)
1200 base = be_transform_node(base);
1207 idx = be_transform_node(idx);
1210 /* segment overrides are ineffective for Leas :-( so we have to patch
1212 if (addr->tls_segment) {
1213 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1214 assert(addr->symconst_ent != NULL);
1215 if (base == noreg_GP)
1218 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1219 addr->tls_segment = false;
1222 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1223 set_address(res, addr);
1229 * Returns non-zero if a given address mode has a symbolic or
1230 * numerical offset != 0.
1232 static int am_has_immediates(const ia32_address_t *addr)
1234 return addr->offset != 0 || addr->symconst_ent != NULL
1235 || addr->frame_entity || addr->use_frame;
1239 * Creates an ia32 Add.
1241 * @return the created ia32 Add node
1243 static ir_node *gen_Add(ir_node *node)
1245 ir_mode *mode = get_irn_mode(node);
1246 ir_node *op1 = get_Add_left(node);
1247 ir_node *op2 = get_Add_right(node);
1249 ir_node *block, *new_block, *new_node, *add_immediate_op;
1250 ia32_address_t addr;
1251 ia32_address_mode_t am;
1253 if (mode_is_float(mode)) {
1254 if (ia32_cg_config.use_sse2)
1255 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1256 match_commutative | match_am);
1258 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1261 ia32_mark_non_am(node);
1263 op2 = ia32_skip_downconv(op2);
1264 op1 = ia32_skip_downconv(op1);
1268 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1269 * 1. Add with immediate -> Lea
1270 * 2. Add with possible source address mode -> Add
1271 * 3. Otherwise -> Lea
1273 memset(&addr, 0, sizeof(addr));
1274 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1275 add_immediate_op = NULL;
1277 dbgi = get_irn_dbg_info(node);
1278 block = get_nodes_block(node);
1279 new_block = be_transform_node(block);
1282 if (addr.base == NULL && addr.index == NULL) {
1283 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1284 addr.symconst_sign, 0, addr.offset);
1285 SET_IA32_ORIG_NODE(new_node, node);
1288 /* add with immediate? */
1289 if (addr.index == NULL) {
1290 add_immediate_op = addr.base;
1291 } else if (addr.base == NULL && addr.scale == 0) {
1292 add_immediate_op = addr.index;
1295 if (add_immediate_op != NULL) {
1296 if (!am_has_immediates(&addr)) {
1297 #ifdef DEBUG_libfirm
1298 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1301 return be_transform_node(add_immediate_op);
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1309 /* test if we can use source address mode */
1310 match_arguments(&am, block, op1, op2, NULL, match_commutative
1311 | match_mode_neutral | match_am | match_immediate | match_try_am);
1313 /* construct an Add with source address mode */
1314 if (am.op_type == ia32_AddrModeS) {
1315 ia32_address_t *am_addr = &am.addr;
1316 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1317 am_addr->index, am_addr->mem, am.new_op1,
1319 set_am_attributes(new_node, &am);
1320 SET_IA32_ORIG_NODE(new_node, node);
1322 new_node = fix_mem_proj(new_node, &am);
1327 /* otherwise construct a lea */
1328 new_node = create_lea_from_address(dbgi, new_block, &addr);
1329 SET_IA32_ORIG_NODE(new_node, node);
1334 * Creates an ia32 Mul.
1336 * @return the created ia32 Mul node
1338 static ir_node *gen_Mul(ir_node *node)
1340 ir_node *op1 = get_Mul_left(node);
1341 ir_node *op2 = get_Mul_right(node);
1342 ir_mode *mode = get_irn_mode(node);
1344 if (mode_is_float(mode)) {
1345 if (ia32_cg_config.use_sse2)
1346 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1347 match_commutative | match_am);
1349 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1351 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1352 match_commutative | match_am | match_mode_neutral |
1353 match_immediate | match_am_and_immediates);
1357 * Creates an ia32 Mulh.
1358 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1359 * this result while Mul returns the lower 32 bit.
1361 * @return the created ia32 Mulh node
1363 static ir_node *gen_Mulh(ir_node *node)
1365 dbg_info *dbgi = get_irn_dbg_info(node);
1366 ir_node *op1 = get_Mulh_left(node);
1367 ir_node *op2 = get_Mulh_right(node);
1368 ir_mode *mode = get_irn_mode(node);
1370 ir_node *proj_res_high;
1372 if (get_mode_size_bits(mode) != 32) {
1373 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1376 if (mode_is_signed(mode)) {
1377 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1378 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1380 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1381 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1383 return proj_res_high;
1387 * Creates an ia32 And.
1389 * @return The created ia32 And node
1391 static ir_node *gen_And(ir_node *node)
1393 ir_node *op1 = get_And_left(node);
1394 ir_node *op2 = get_And_right(node);
1395 assert(! mode_is_float(get_irn_mode(node)));
1397 /* is it a zero extension? */
1398 if (is_Const(op2)) {
1399 ir_tarval *tv = get_Const_tarval(op2);
1400 long v = get_tarval_long(tv);
1402 if (v == 0xFF || v == 0xFFFF) {
1403 dbg_info *dbgi = get_irn_dbg_info(node);
1404 ir_node *block = get_nodes_block(node);
1411 assert(v == 0xFFFF);
1414 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1419 return gen_binop(node, op1, op2, new_bd_ia32_And,
1420 match_commutative | match_mode_neutral | match_am | match_immediate);
1424 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1427 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1429 if (is_Const(value1) && is_Const(value2)) {
1430 ir_tarval *tv1 = get_Const_tarval(value1);
1431 ir_tarval *tv2 = get_Const_tarval(value2);
1432 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1433 long v1 = get_tarval_long(tv1);
1434 long v2 = get_tarval_long(tv2);
1435 return v1 <= v2 && v2 == 32-v1;
1441 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1442 ir_node *high, ir_node *low,
1446 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1447 * op1 - target to be shifted
1448 * op2 - contains bits to be shifted into target
1450 * Only op3 can be an immediate.
1452 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1453 ir_node *high, ir_node *low, ir_node *count,
1454 new_shiftd_func func)
1456 ir_node *new_block = be_transform_node(block);
1457 ir_node *new_high = be_transform_node(high);
1458 ir_node *new_low = be_transform_node(low);
1462 /* the shift amount can be any mode that is bigger than 5 bits, since all
1463 * other bits are ignored anyway */
1464 while (is_Conv(count) &&
1465 get_irn_n_edges(count) == 1 &&
1466 mode_is_int(get_irn_mode(count))) {
1467 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1468 count = get_Conv_op(count);
1470 new_count = create_immediate_or_transform(count, 0);
1472 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1476 static ir_node *match_64bit_shift(ir_node *node)
1478 ir_node *op1 = get_Or_left(node);
1479 ir_node *op2 = get_Or_right(node);
1487 /* match ShlD operation */
1488 if (is_Shl(op1) && is_Shr(op2)) {
1489 ir_node *shl_right = get_Shl_right(op1);
1490 ir_node *shl_left = get_Shl_left(op1);
1491 ir_node *shr_right = get_Shr_right(op2);
1492 ir_node *shr_left = get_Shr_left(op2);
1493 /* constant ShlD operation */
1494 if (is_complementary_shifts(shl_right, shr_right)) {
1495 dbg_info *dbgi = get_irn_dbg_info(node);
1496 ir_node *block = get_nodes_block(node);
1497 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1500 /* constant ShrD operation */
1501 if (is_complementary_shifts(shr_right, shl_right)) {
1502 dbg_info *dbgi = get_irn_dbg_info(node);
1503 ir_node *block = get_nodes_block(node);
1504 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1507 /* lower_dw produces the following for ShlD:
1508 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1509 if (is_Shr(shr_left) && is_Not(shr_right)
1510 && is_Const_1(get_Shr_right(shr_left))
1511 && get_Not_op(shr_right) == shl_right) {
1512 dbg_info *dbgi = get_irn_dbg_info(node);
1513 ir_node *block = get_nodes_block(node);
1514 ir_node *val_h = get_Shr_left(shr_left);
1515 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1518 /* lower_dw produces the following for ShrD:
1519 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1520 if (is_Shl(shl_left) && is_Not(shl_right)
1521 && is_Const_1(get_Shl_right(shl_left))
1522 && get_Not_op(shl_right) == shr_right) {
1523 dbg_info *dbgi = get_irn_dbg_info(node);
1524 ir_node *block = get_nodes_block(node);
1525 ir_node *val_h = get_Shl_left(shl_left);
1526 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1535 * Creates an ia32 Or.
1537 * @return The created ia32 Or node
1539 static ir_node *gen_Or(ir_node *node)
1541 ir_node *op1 = get_Or_left(node);
1542 ir_node *op2 = get_Or_right(node);
1545 res = match_64bit_shift(node);
1549 assert (! mode_is_float(get_irn_mode(node)));
1550 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1551 | match_mode_neutral | match_am | match_immediate);
1557 * Creates an ia32 Eor.
1559 * @return The created ia32 Eor node
1561 static ir_node *gen_Eor(ir_node *node)
1563 ir_node *op1 = get_Eor_left(node);
1564 ir_node *op2 = get_Eor_right(node);
1566 assert(! mode_is_float(get_irn_mode(node)));
1567 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1568 | match_mode_neutral | match_am | match_immediate);
1573 * Creates an ia32 Sub.
1575 * @return The created ia32 Sub node
1577 static ir_node *gen_Sub(ir_node *node)
1579 ir_node *op1 = get_Sub_left(node);
1580 ir_node *op2 = get_Sub_right(node);
1581 ir_mode *mode = get_irn_mode(node);
1583 if (mode_is_float(mode)) {
1584 if (ia32_cg_config.use_sse2)
1585 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1587 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1590 if (is_Const(op2)) {
1591 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1595 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1596 | match_am | match_immediate);
1599 static ir_node *transform_AM_mem(ir_node *const block,
1600 ir_node *const src_val,
1601 ir_node *const src_mem,
1602 ir_node *const am_mem)
1604 if (is_NoMem(am_mem)) {
1605 return be_transform_node(src_mem);
1606 } else if (is_Proj(src_val) &&
1608 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1609 /* avoid memory loop */
1611 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1612 ir_node *const ptr_pred = get_Proj_pred(src_val);
1613 int const arity = get_Sync_n_preds(src_mem);
1618 NEW_ARR_A(ir_node*, ins, arity + 1);
1620 /* NOTE: This sometimes produces dead-code because the old sync in
1621 * src_mem might not be used anymore, we should detect this case
1622 * and kill the sync... */
1623 for (i = arity - 1; i >= 0; --i) {
1624 ir_node *const pred = get_Sync_pred(src_mem, i);
1626 /* avoid memory loop */
1627 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1630 ins[n++] = be_transform_node(pred);
1633 if (n==1 && ins[0] == am_mem) {
1635 /* creating a new Sync and relying on CSE may fail,
1636 * if am_mem is a ProjM, which does not yet verify. */
1640 return new_r_Sync(block, n, ins);
1644 ins[0] = be_transform_node(src_mem);
1646 return new_r_Sync(block, 2, ins);
1651 * Create a 32bit to 64bit signed extension.
1653 * @param dbgi debug info
1654 * @param block the block where node nodes should be placed
1655 * @param val the value to extend
1656 * @param orig the original node
1658 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1659 ir_node *val, const ir_node *orig)
1664 if (ia32_cg_config.use_short_sex_eax) {
1665 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1666 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1668 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1669 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1671 SET_IA32_ORIG_NODE(res, orig);
1676 * Generates an ia32 Div with additional infrastructure for the
1677 * register allocator if needed.
1679 static ir_node *create_Div(ir_node *node)
1681 dbg_info *dbgi = get_irn_dbg_info(node);
1682 ir_node *block = get_nodes_block(node);
1683 ir_node *new_block = be_transform_node(block);
1684 int throws_exception = ir_throws_exception(node);
1691 ir_node *sign_extension;
1692 ia32_address_mode_t am;
1693 ia32_address_t *addr = &am.addr;
1695 /* the upper bits have random contents for smaller modes */
1696 switch (get_irn_opcode(node)) {
1698 op1 = get_Div_left(node);
1699 op2 = get_Div_right(node);
1700 mem = get_Div_mem(node);
1701 mode = get_Div_resmode(node);
1704 op1 = get_Mod_left(node);
1705 op2 = get_Mod_right(node);
1706 mem = get_Mod_mem(node);
1707 mode = get_Mod_resmode(node);
1710 panic("invalid divmod node %+F", node);
1713 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1715 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1716 is the memory of the consumed address. We can have only the second op as address
1717 in Div nodes, so check only op2. */
1718 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1720 if (mode_is_signed(mode)) {
1721 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1722 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1723 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1725 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1727 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1728 addr->index, new_mem, am.new_op2,
1729 am.new_op1, sign_extension);
1731 ir_set_throws_exception(new_node, throws_exception);
1733 set_irn_pinned(new_node, get_irn_pinned(node));
1735 set_am_attributes(new_node, &am);
1736 SET_IA32_ORIG_NODE(new_node, node);
1738 new_node = fix_mem_proj(new_node, &am);
1744 * Generates an ia32 Mod.
1746 static ir_node *gen_Mod(ir_node *node)
1748 return create_Div(node);
1752 * Generates an ia32 Div.
1754 static ir_node *gen_Div(ir_node *node)
1756 ir_mode *mode = get_Div_resmode(node);
1757 if (mode_is_float(mode)) {
1758 ir_node *op1 = get_Div_left(node);
1759 ir_node *op2 = get_Div_right(node);
1761 if (ia32_cg_config.use_sse2) {
1762 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1764 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1768 return create_Div(node);
1772 * Creates an ia32 Shl.
1774 * @return The created ia32 Shl node
1776 static ir_node *gen_Shl(ir_node *node)
1778 ir_node *left = get_Shl_left(node);
1779 ir_node *right = get_Shl_right(node);
1781 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1782 match_mode_neutral | match_immediate);
1786 * Creates an ia32 Shr.
1788 * @return The created ia32 Shr node
1790 static ir_node *gen_Shr(ir_node *node)
1792 ir_node *left = get_Shr_left(node);
1793 ir_node *right = get_Shr_right(node);
1795 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1801 * Creates an ia32 Sar.
1803 * @return The created ia32 Shrs node
1805 static ir_node *gen_Shrs(ir_node *node)
1807 ir_node *left = get_Shrs_left(node);
1808 ir_node *right = get_Shrs_right(node);
1810 if (is_Const(right)) {
1811 ir_tarval *tv = get_Const_tarval(right);
1812 long val = get_tarval_long(tv);
1814 /* this is a sign extension */
1815 dbg_info *dbgi = get_irn_dbg_info(node);
1816 ir_node *block = be_transform_node(get_nodes_block(node));
1817 ir_node *new_op = be_transform_node(left);
1819 return create_sex_32_64(dbgi, block, new_op, node);
1823 /* 8 or 16 bit sign extension? */
1824 if (is_Const(right) && is_Shl(left)) {
1825 ir_node *shl_left = get_Shl_left(left);
1826 ir_node *shl_right = get_Shl_right(left);
1827 if (is_Const(shl_right)) {
1828 ir_tarval *tv1 = get_Const_tarval(right);
1829 ir_tarval *tv2 = get_Const_tarval(shl_right);
1830 if (tv1 == tv2 && tarval_is_long(tv1)) {
1831 long val = get_tarval_long(tv1);
1832 if (val == 16 || val == 24) {
1833 dbg_info *dbgi = get_irn_dbg_info(node);
1834 ir_node *block = get_nodes_block(node);
1844 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1853 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1859 * Creates an ia32 Rol.
1861 * @param op1 The first operator
1862 * @param op2 The second operator
1863 * @return The created ia32 RotL node
1865 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1867 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1873 * Creates an ia32 Ror.
1874 * NOTE: There is no RotR with immediate because this would always be a RotL
1875 * "imm-mode_size_bits" which can be pre-calculated.
1877 * @param op1 The first operator
1878 * @param op2 The second operator
1879 * @return The created ia32 RotR node
1881 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1883 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1889 * Creates an ia32 RotR or RotL (depending on the found pattern).
1891 * @return The created ia32 RotL or RotR node
1893 static ir_node *gen_Rotl(ir_node *node)
1895 ir_node *op1 = get_Rotl_left(node);
1896 ir_node *op2 = get_Rotl_right(node);
1898 if (is_Minus(op2)) {
1899 return gen_Ror(node, op1, get_Minus_op(op2));
1902 return gen_Rol(node, op1, op2);
1908 * Transforms a Minus node.
1910 * @return The created ia32 Minus node
1912 static ir_node *gen_Minus(ir_node *node)
1914 ir_node *op = get_Minus_op(node);
1915 ir_node *block = be_transform_node(get_nodes_block(node));
1916 dbg_info *dbgi = get_irn_dbg_info(node);
1917 ir_mode *mode = get_irn_mode(node);
1922 if (mode_is_float(mode)) {
1923 ir_node *new_op = be_transform_node(op);
1924 if (ia32_cg_config.use_sse2) {
1925 /* TODO: non-optimal... if we have many xXors, then we should
1926 * rather create a load for the const and use that instead of
1927 * several AM nodes... */
1928 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1930 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1931 noreg_GP, nomem, new_op, noreg_xmm);
1933 size = get_mode_size_bits(mode);
1934 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1936 set_ia32_am_sc(new_node, ent);
1937 set_ia32_op_type(new_node, ia32_AddrModeS);
1938 set_ia32_ls_mode(new_node, mode);
1940 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1943 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1946 SET_IA32_ORIG_NODE(new_node, node);
1952 * Transforms a Not node.
1954 * @return The created ia32 Not node
1956 static ir_node *gen_Not(ir_node *node)
1958 ir_node *op = get_Not_op(node);
1960 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1961 assert (! mode_is_float(get_irn_mode(node)));
1963 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1966 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1967 bool negate, ir_node *node)
1969 ir_node *new_block = be_transform_node(block);
1970 ir_mode *mode = get_irn_mode(op);
1971 ir_node *new_op = be_transform_node(op);
1976 assert(mode_is_float(mode));
1978 if (ia32_cg_config.use_sse2) {
1979 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1980 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1981 noreg_GP, nomem, new_op, noreg_fp);
1983 size = get_mode_size_bits(mode);
1984 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1986 set_ia32_am_sc(new_node, ent);
1988 SET_IA32_ORIG_NODE(new_node, node);
1990 set_ia32_op_type(new_node, ia32_AddrModeS);
1991 set_ia32_ls_mode(new_node, mode);
1993 /* TODO, implement -Abs case */
1996 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1997 SET_IA32_ORIG_NODE(new_node, node);
1999 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2000 SET_IA32_ORIG_NODE(new_node, node);
2008 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2010 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2012 dbg_info *dbgi = get_irn_dbg_info(cmp);
2013 ir_node *block = get_nodes_block(cmp);
2014 ir_node *new_block = be_transform_node(block);
2015 ir_node *op1 = be_transform_node(x);
2016 ir_node *op2 = be_transform_node(n);
2018 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2021 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2024 if (mode_is_float(mode)) {
2026 case ir_relation_equal: return ia32_cc_float_equal;
2027 case ir_relation_less: return ia32_cc_float_below;
2028 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2029 case ir_relation_greater: return ia32_cc_float_above;
2030 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2031 case ir_relation_less_greater: return ia32_cc_not_equal;
2032 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2033 case ir_relation_unordered: return ia32_cc_parity;
2034 case ir_relation_unordered_equal: return ia32_cc_equal;
2035 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2036 case ir_relation_unordered_less_equal:
2037 return ia32_cc_float_unordered_below_equal;
2038 case ir_relation_unordered_greater:
2039 return ia32_cc_float_unordered_above;
2040 case ir_relation_unordered_greater_equal:
2041 return ia32_cc_float_unordered_above_equal;
2042 case ir_relation_unordered_less_greater:
2043 return ia32_cc_float_not_equal;
2044 case ir_relation_false:
2045 case ir_relation_true:
2046 /* should we introduce a jump always/jump never? */
2049 panic("Unexpected float pnc");
2050 } else if (mode_is_signed(mode)) {
2052 case ir_relation_unordered_equal:
2053 case ir_relation_equal: return ia32_cc_equal;
2054 case ir_relation_unordered_less:
2055 case ir_relation_less: return ia32_cc_less;
2056 case ir_relation_unordered_less_equal:
2057 case ir_relation_less_equal: return ia32_cc_less_equal;
2058 case ir_relation_unordered_greater:
2059 case ir_relation_greater: return ia32_cc_greater;
2060 case ir_relation_unordered_greater_equal:
2061 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2062 case ir_relation_unordered_less_greater:
2063 case ir_relation_less_greater: return ia32_cc_not_equal;
2064 case ir_relation_less_equal_greater:
2065 case ir_relation_unordered:
2066 case ir_relation_false:
2067 case ir_relation_true:
2068 /* introduce jump always/jump never? */
2071 panic("Unexpected pnc");
2074 case ir_relation_unordered_equal:
2075 case ir_relation_equal: return ia32_cc_equal;
2076 case ir_relation_unordered_less:
2077 case ir_relation_less: return ia32_cc_below;
2078 case ir_relation_unordered_less_equal:
2079 case ir_relation_less_equal: return ia32_cc_below_equal;
2080 case ir_relation_unordered_greater:
2081 case ir_relation_greater: return ia32_cc_above;
2082 case ir_relation_unordered_greater_equal:
2083 case ir_relation_greater_equal: return ia32_cc_above_equal;
2084 case ir_relation_unordered_less_greater:
2085 case ir_relation_less_greater: return ia32_cc_not_equal;
2086 case ir_relation_less_equal_greater:
2087 case ir_relation_unordered:
2088 case ir_relation_false:
2089 case ir_relation_true:
2090 /* introduce jump always/jump never? */
2093 panic("Unexpected pnc");
2097 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
2099 /* a mode_b value, we have to compare it against 0 */
2100 dbg_info *dbgi = get_irn_dbg_info(node);
2101 ir_node *new_block = be_transform_node(get_nodes_block(node));
2102 ir_node *new_op = be_transform_node(node);
2103 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
2104 set_ia32_ls_mode(flags, get_irn_mode(new_op));
2105 *cc_out = ia32_cc_not_equal;
2109 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2111 /* must have a Cmp as input */
2112 ir_relation relation = get_Cmp_relation(cmp);
2113 ir_relation possible;
2114 ir_node *l = get_Cmp_left(cmp);
2115 ir_node *r = get_Cmp_right(cmp);
2116 ir_mode *mode = get_irn_mode(l);
2119 /* check for bit-test */
2120 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2121 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2122 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2124 ir_node *la = get_And_left(l);
2125 ir_node *ra = get_And_right(l);
2132 ir_node *c = get_Shl_left(la);
2133 if (is_Const_1(c) && is_Const_0(r)) {
2134 /* (1 << n) & ra) */
2135 ir_node *n = get_Shl_right(la);
2136 flags = gen_bt(cmp, ra, n);
2137 /* the bit is copied into the CF flag */
2138 if (relation & ir_relation_equal)
2139 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2141 *cc_out = ia32_cc_below; /* test for CF=1 */
2147 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2148 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2149 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2150 * a predecessor node). So add the < bit */
2151 possible = ir_get_possible_cmp_relations(l, r);
2152 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2153 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2154 relation |= ir_relation_less_greater;
2156 /* just do a normal transformation of the Cmp */
2157 *cc_out = relation_to_condition_code(relation, mode);
2158 flags = be_transform_node(cmp);
2163 * Transform a node returning a "flag" result.
2165 * @param node the node to transform
2166 * @param cc_out the compare mode to use
2168 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2171 return get_flags_node_cmp(node, cc_out);
2172 assert(get_irn_mode(node) == mode_b);
2173 return get_flags_mode_b(node, cc_out);
2177 * Transforms a Load.
2179 * @return the created ia32 Load node
2181 static ir_node *gen_Load(ir_node *node)
2183 ir_node *old_block = get_nodes_block(node);
2184 ir_node *block = be_transform_node(old_block);
2185 ir_node *ptr = get_Load_ptr(node);
2186 ir_node *mem = get_Load_mem(node);
2187 ir_node *new_mem = be_transform_node(mem);
2188 dbg_info *dbgi = get_irn_dbg_info(node);
2189 ir_mode *mode = get_Load_mode(node);
2190 int throws_exception = ir_throws_exception(node);
2194 ia32_address_t addr;
2196 /* construct load address */
2197 memset(&addr, 0, sizeof(addr));
2198 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2205 base = be_transform_node(base);
2211 idx = be_transform_node(idx);
2214 if (mode_is_float(mode)) {
2215 if (ia32_cg_config.use_sse2) {
2216 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2219 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2223 assert(mode != mode_b);
2225 /* create a conv node with address mode for smaller modes */
2226 if (get_mode_size_bits(mode) < 32) {
2227 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2228 new_mem, noreg_GP, mode);
2230 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2233 ir_set_throws_exception(new_node, throws_exception);
2235 set_irn_pinned(new_node, get_irn_pinned(node));
2236 set_ia32_op_type(new_node, ia32_AddrModeS);
2237 set_ia32_ls_mode(new_node, mode);
2238 set_address(new_node, &addr);
2240 if (get_irn_pinned(node) == op_pin_state_floats) {
2241 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2242 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2243 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2244 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2247 SET_IA32_ORIG_NODE(new_node, node);
2252 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2253 ir_node *ptr, ir_node *other)
2260 /* we only use address mode if we're the only user of the load */
2261 if (get_irn_n_edges(node) > 1)
2264 load = get_Proj_pred(node);
2267 if (get_nodes_block(load) != block)
2270 /* store should have the same pointer as the load */
2271 if (get_Load_ptr(load) != ptr)
2274 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2275 if (other != NULL &&
2276 get_nodes_block(other) == block &&
2277 heights_reachable_in_block(ia32_heights, other, load)) {
2281 if (ia32_prevents_AM(block, load, mem))
2283 /* Store should be attached to the load via mem */
2284 assert(heights_reachable_in_block(ia32_heights, mem, load));
2289 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2290 ir_node *mem, ir_node *ptr, ir_mode *mode,
2291 construct_binop_dest_func *func,
2292 construct_binop_dest_func *func8bit,
2293 match_flags_t flags)
2295 ir_node *src_block = get_nodes_block(node);
2303 ia32_address_mode_t am;
2304 ia32_address_t *addr = &am.addr;
2305 memset(&am, 0, sizeof(am));
2307 assert(flags & match_immediate); /* there is no destam node without... */
2308 commutative = (flags & match_commutative) != 0;
2310 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2311 build_address(&am, op1, ia32_create_am_double_use);
2312 new_op = create_immediate_or_transform(op2, 0);
2313 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2314 build_address(&am, op2, ia32_create_am_double_use);
2315 new_op = create_immediate_or_transform(op1, 0);
2320 if (addr->base == NULL)
2321 addr->base = noreg_GP;
2322 if (addr->index == NULL)
2323 addr->index = noreg_GP;
2324 if (addr->mem == NULL)
2327 dbgi = get_irn_dbg_info(node);
2328 block = be_transform_node(src_block);
2329 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2331 if (get_mode_size_bits(mode) == 8) {
2332 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2334 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2336 set_address(new_node, addr);
2337 set_ia32_op_type(new_node, ia32_AddrModeD);
2338 set_ia32_ls_mode(new_node, mode);
2339 SET_IA32_ORIG_NODE(new_node, node);
2341 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2342 mem_proj = be_transform_node(am.mem_proj);
2343 be_set_transformed_node(am.mem_proj, new_node);
2344 be_set_transformed_node(mem_proj, new_node);
2349 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2350 ir_node *ptr, ir_mode *mode,
2351 construct_unop_dest_func *func)
2353 ir_node *src_block = get_nodes_block(node);
2359 ia32_address_mode_t am;
2360 ia32_address_t *addr = &am.addr;
2362 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2365 memset(&am, 0, sizeof(am));
2366 build_address(&am, op, ia32_create_am_double_use);
2368 dbgi = get_irn_dbg_info(node);
2369 block = be_transform_node(src_block);
2370 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2371 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2372 set_address(new_node, addr);
2373 set_ia32_op_type(new_node, ia32_AddrModeD);
2374 set_ia32_ls_mode(new_node, mode);
2375 SET_IA32_ORIG_NODE(new_node, node);
2377 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2378 mem_proj = be_transform_node(am.mem_proj);
2379 be_set_transformed_node(am.mem_proj, new_node);
2380 be_set_transformed_node(mem_proj, new_node);
2385 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2387 ir_mode *mode = get_irn_mode(node);
2388 ir_node *mux_true = get_Mux_true(node);
2389 ir_node *mux_false = get_Mux_false(node);
2397 ia32_condition_code_t cc;
2398 ia32_address_t addr;
2400 if (get_mode_size_bits(mode) != 8)
2403 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2405 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2411 cond = get_Mux_sel(node);
2412 flags = get_flags_node(cond, &cc);
2413 /* we can't handle the float special cases with SetM */
2414 if (cc & ia32_cc_additional_float_cases)
2417 cc = ia32_negate_condition_code(cc);
2419 build_address_ptr(&addr, ptr, mem);
2421 dbgi = get_irn_dbg_info(node);
2422 block = get_nodes_block(node);
2423 new_block = be_transform_node(block);
2424 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2425 addr.index, addr.mem, flags, cc);
2426 set_address(new_node, &addr);
2427 set_ia32_op_type(new_node, ia32_AddrModeD);
2428 set_ia32_ls_mode(new_node, mode);
2429 SET_IA32_ORIG_NODE(new_node, node);
2434 static ir_node *try_create_dest_am(ir_node *node)
2436 ir_node *val = get_Store_value(node);
2437 ir_node *mem = get_Store_mem(node);
2438 ir_node *ptr = get_Store_ptr(node);
2439 ir_mode *mode = get_irn_mode(val);
2440 unsigned bits = get_mode_size_bits(mode);
2445 /* handle only GP modes for now... */
2446 if (!ia32_mode_needs_gp_reg(mode))
2450 /* store must be the only user of the val node */
2451 if (get_irn_n_edges(val) > 1)
2453 /* skip pointless convs */
2455 ir_node *conv_op = get_Conv_op(val);
2456 ir_mode *pred_mode = get_irn_mode(conv_op);
2457 if (!ia32_mode_needs_gp_reg(pred_mode))
2459 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2467 /* value must be in the same block */
2468 if (get_nodes_block(node) != get_nodes_block(val))
2471 switch (get_irn_opcode(val)) {
2473 op1 = get_Add_left(val);
2474 op2 = get_Add_right(val);
2475 if (ia32_cg_config.use_incdec) {
2476 if (is_Const_1(op2)) {
2477 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2479 } else if (is_Const_Minus_1(op2)) {
2480 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2484 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2485 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2486 match_commutative | match_immediate);
2489 op1 = get_Sub_left(val);
2490 op2 = get_Sub_right(val);
2491 if (is_Const(op2)) {
2492 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2494 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2495 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2499 op1 = get_And_left(val);
2500 op2 = get_And_right(val);
2501 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2502 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2503 match_commutative | match_immediate);
2506 op1 = get_Or_left(val);
2507 op2 = get_Or_right(val);
2508 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2509 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2510 match_commutative | match_immediate);
2513 op1 = get_Eor_left(val);
2514 op2 = get_Eor_right(val);
2515 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2516 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2517 match_commutative | match_immediate);
2520 op1 = get_Shl_left(val);
2521 op2 = get_Shl_right(val);
2522 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2523 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2527 op1 = get_Shr_left(val);
2528 op2 = get_Shr_right(val);
2529 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2530 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2534 op1 = get_Shrs_left(val);
2535 op2 = get_Shrs_right(val);
2536 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2537 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2541 op1 = get_Rotl_left(val);
2542 op2 = get_Rotl_right(val);
2543 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2544 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2547 /* TODO: match ROR patterns... */
2549 new_node = try_create_SetMem(val, ptr, mem);
2553 op1 = get_Minus_op(val);
2554 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2557 /* should be lowered already */
2558 assert(mode != mode_b);
2559 op1 = get_Not_op(val);
2560 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2566 if (new_node != NULL) {
2567 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2568 get_irn_pinned(node) == op_pin_state_pinned) {
2569 set_irn_pinned(new_node, op_pin_state_pinned);
2576 static bool possible_int_mode_for_fp(ir_mode *mode)
2580 if (!mode_is_signed(mode))
2582 size = get_mode_size_bits(mode);
2583 if (size != 16 && size != 32)
2588 static int is_float_to_int_conv(const ir_node *node)
2590 ir_mode *mode = get_irn_mode(node);
2594 if (!possible_int_mode_for_fp(mode))
2599 conv_op = get_Conv_op(node);
2600 conv_mode = get_irn_mode(conv_op);
2602 if (!mode_is_float(conv_mode))
2609 * Transform a Store(floatConst) into a sequence of
2612 * @return the created ia32 Store node
2614 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2616 ir_mode *mode = get_irn_mode(cns);
2617 unsigned size = get_mode_size_bytes(mode);
2618 ir_tarval *tv = get_Const_tarval(cns);
2619 ir_node *block = get_nodes_block(node);
2620 ir_node *new_block = be_transform_node(block);
2621 ir_node *ptr = get_Store_ptr(node);
2622 ir_node *mem = get_Store_mem(node);
2623 dbg_info *dbgi = get_irn_dbg_info(node);
2626 int throws_exception = ir_throws_exception(node);
2628 ia32_address_t addr;
2630 assert(size % 4 == 0);
2633 build_address_ptr(&addr, ptr, mem);
2637 get_tarval_sub_bits(tv, ofs) |
2638 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2639 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2640 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2641 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2643 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2644 addr.index, addr.mem, imm);
2645 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2647 ir_set_throws_exception(new_node, throws_exception);
2648 set_irn_pinned(new_node, get_irn_pinned(node));
2649 set_ia32_op_type(new_node, ia32_AddrModeD);
2650 set_ia32_ls_mode(new_node, mode_Iu);
2651 set_address(new_node, &addr);
2652 SET_IA32_ORIG_NODE(new_node, node);
2660 } while (size != 0);
2663 return new_rd_Sync(dbgi, new_block, i, ins);
2665 return get_Proj_pred(ins[0]);
2670 * Generate a vfist or vfisttp instruction.
2672 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2673 ir_node *index, ir_node *mem, ir_node *val)
2675 if (ia32_cg_config.use_fisttp) {
2676 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2677 if other users exists */
2678 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2679 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2680 be_new_Keep(block, 1, &value);
2684 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2687 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2693 * Transforms a general (no special case) Store.
2695 * @return the created ia32 Store node
2697 static ir_node *gen_general_Store(ir_node *node)
2699 ir_node *val = get_Store_value(node);
2700 ir_mode *mode = get_irn_mode(val);
2701 ir_node *block = get_nodes_block(node);
2702 ir_node *new_block = be_transform_node(block);
2703 ir_node *ptr = get_Store_ptr(node);
2704 ir_node *mem = get_Store_mem(node);
2705 dbg_info *dbgi = get_irn_dbg_info(node);
2706 int throws_exception = ir_throws_exception(node);
2709 ia32_address_t addr;
2711 /* check for destination address mode */
2712 new_node = try_create_dest_am(node);
2713 if (new_node != NULL)
2716 /* construct store address */
2717 memset(&addr, 0, sizeof(addr));
2718 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2720 if (addr.base == NULL) {
2721 addr.base = noreg_GP;
2723 addr.base = be_transform_node(addr.base);
2726 if (addr.index == NULL) {
2727 addr.index = noreg_GP;
2729 addr.index = be_transform_node(addr.index);
2731 addr.mem = be_transform_node(mem);
2733 if (mode_is_float(mode)) {
2734 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2736 while (is_Conv(val) && mode == get_irn_mode(val)) {
2737 ir_node *op = get_Conv_op(val);
2738 if (!mode_is_float(get_irn_mode(op)))
2742 new_val = be_transform_node(val);
2743 if (ia32_cg_config.use_sse2) {
2744 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2745 addr.index, addr.mem, new_val);
2747 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2748 addr.index, addr.mem, new_val, mode);
2750 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2751 val = get_Conv_op(val);
2753 /* TODO: is this optimisation still necessary at all (middleend)? */
2754 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2756 while (is_Conv(val)) {
2757 ir_node *op = get_Conv_op(val);
2758 if (!mode_is_float(get_irn_mode(op)))
2760 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2764 new_val = be_transform_node(val);
2765 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2767 new_val = create_immediate_or_transform(val, 0);
2768 assert(mode != mode_b);
2770 if (get_mode_size_bits(mode) == 8) {
2771 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2772 addr.index, addr.mem, new_val);
2774 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2775 addr.index, addr.mem, new_val);
2778 ir_set_throws_exception(new_node, throws_exception);
2780 set_irn_pinned(new_node, get_irn_pinned(node));
2781 set_ia32_op_type(new_node, ia32_AddrModeD);
2782 set_ia32_ls_mode(new_node, mode);
2784 set_address(new_node, &addr);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 * Transforms a Store.
2793 * @return the created ia32 Store node
2795 static ir_node *gen_Store(ir_node *node)
2797 ir_node *val = get_Store_value(node);
2798 ir_mode *mode = get_irn_mode(val);
2800 if (mode_is_float(mode) && is_Const(val)) {
2801 /* We can transform every floating const store
2802 into a sequence of integer stores.
2803 If the constant is already in a register,
2804 it would be better to use it, but we don't
2805 have this information here. */
2806 return gen_float_const_Store(node, val);
2808 return gen_general_Store(node);
2812 * Transforms a Switch.
2814 * @return the created ia32 SwitchJmp node
2816 static ir_node *create_Switch(ir_node *node)
2818 dbg_info *dbgi = get_irn_dbg_info(node);
2819 ir_node *block = be_transform_node(get_nodes_block(node));
2820 ir_node *sel = get_Cond_selector(node);
2821 ir_node *new_sel = be_transform_node(sel);
2822 long default_pn = get_Cond_default_proj(node);
2826 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2828 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2829 set_entity_visibility(entity, ir_visibility_private);
2830 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2832 /* TODO: we could perform some more matching here to also use the base
2833 * register of the address mode */
2835 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2836 set_ia32_am_scale(new_node, 2);
2837 set_ia32_am_sc(new_node, entity);
2838 set_ia32_op_type(new_node, ia32_AddrModeS);
2839 set_ia32_ls_mode(new_node, mode_Iu);
2840 SET_IA32_ORIG_NODE(new_node, node);
2841 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2842 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2848 * Transform a Cond node.
2850 static ir_node *gen_Cond(ir_node *node)
2852 ir_node *block = get_nodes_block(node);
2853 ir_node *new_block = be_transform_node(block);
2854 dbg_info *dbgi = get_irn_dbg_info(node);
2855 ir_node *sel = get_Cond_selector(node);
2856 ir_mode *sel_mode = get_irn_mode(sel);
2857 ir_node *flags = NULL;
2859 ia32_condition_code_t cc;
2861 if (sel_mode != mode_b) {
2862 return create_Switch(node);
2865 /* we get flags from a Cmp */
2866 flags = get_flags_node(sel, &cc);
2868 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2869 SET_IA32_ORIG_NODE(new_node, node);
2875 * Transform a be_Copy.
2877 static ir_node *gen_be_Copy(ir_node *node)
2879 ir_node *new_node = be_duplicate_node(node);
2880 ir_mode *mode = get_irn_mode(new_node);
2882 if (ia32_mode_needs_gp_reg(mode)) {
2883 set_irn_mode(new_node, mode_Iu);
2889 static ir_node *create_Fucom(ir_node *node)
2891 dbg_info *dbgi = get_irn_dbg_info(node);
2892 ir_node *block = get_nodes_block(node);
2893 ir_node *new_block = be_transform_node(block);
2894 ir_node *left = get_Cmp_left(node);
2895 ir_node *new_left = be_transform_node(left);
2896 ir_node *right = get_Cmp_right(node);
2900 if (ia32_cg_config.use_fucomi) {
2901 new_right = be_transform_node(right);
2902 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2904 set_ia32_commutative(new_node);
2905 SET_IA32_ORIG_NODE(new_node, node);
2907 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2908 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2910 new_right = be_transform_node(right);
2911 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2914 set_ia32_commutative(new_node);
2916 SET_IA32_ORIG_NODE(new_node, node);
2918 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2919 SET_IA32_ORIG_NODE(new_node, node);
2925 static ir_node *create_Ucomi(ir_node *node)
2927 dbg_info *dbgi = get_irn_dbg_info(node);
2928 ir_node *src_block = get_nodes_block(node);
2929 ir_node *new_block = be_transform_node(src_block);
2930 ir_node *left = get_Cmp_left(node);
2931 ir_node *right = get_Cmp_right(node);
2933 ia32_address_mode_t am;
2934 ia32_address_t *addr = &am.addr;
2936 match_arguments(&am, src_block, left, right, NULL,
2937 match_commutative | match_am);
2939 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2940 addr->mem, am.new_op1, am.new_op2,
2942 set_am_attributes(new_node, &am);
2944 SET_IA32_ORIG_NODE(new_node, node);
2946 new_node = fix_mem_proj(new_node, &am);
2952 * returns true if it is assured, that the upper bits of a node are "clean"
2953 * which means for a 16 or 8 bit value, that the upper bits in the register
2954 * are 0 for unsigned and a copy of the last significant bit for signed
2957 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2959 assert(ia32_mode_needs_gp_reg(mode));
2960 if (get_mode_size_bits(mode) >= 32)
2963 if (is_Proj(transformed_node))
2964 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2966 switch (get_ia32_irn_opcode(transformed_node)) {
2967 case iro_ia32_Conv_I2I:
2968 case iro_ia32_Conv_I2I8Bit: {
2969 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2970 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2972 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2979 if (mode_is_signed(mode)) {
2980 return false; /* TODO handle signed modes */
2982 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2983 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2984 const ia32_immediate_attr_t *attr
2985 = get_ia32_immediate_attr_const(right);
2986 if (attr->symconst == 0 &&
2987 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2991 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2995 /* TODO too conservative if shift amount is constant */
2996 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2999 if (!mode_is_signed(mode)) {
3001 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
3002 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
3004 /* TODO if one is known to be zero extended, then || is sufficient */
3009 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3010 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3012 case iro_ia32_Const:
3013 case iro_ia32_Immediate: {
3014 const ia32_immediate_attr_t *attr =
3015 get_ia32_immediate_attr_const(transformed_node);
3016 if (mode_is_signed(mode)) {
3017 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3018 return shifted == 0 || shifted == -1;
3020 unsigned long shifted = (unsigned long)attr->offset;
3021 shifted >>= get_mode_size_bits(mode);
3022 return shifted == 0;
3032 * Generate code for a Cmp.
3034 static ir_node *gen_Cmp(ir_node *node)
3036 dbg_info *dbgi = get_irn_dbg_info(node);
3037 ir_node *block = get_nodes_block(node);
3038 ir_node *new_block = be_transform_node(block);
3039 ir_node *left = get_Cmp_left(node);
3040 ir_node *right = get_Cmp_right(node);
3041 ir_mode *cmp_mode = get_irn_mode(left);
3043 ia32_address_mode_t am;
3044 ia32_address_t *addr = &am.addr;
3046 if (mode_is_float(cmp_mode)) {
3047 if (ia32_cg_config.use_sse2) {
3048 return create_Ucomi(node);
3050 return create_Fucom(node);
3054 assert(ia32_mode_needs_gp_reg(cmp_mode));
3056 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3057 if (is_Const_0(right) &&
3059 get_irn_n_edges(left) == 1) {
3060 /* Test(and_left, and_right) */
3061 ir_node *and_left = get_And_left(left);
3062 ir_node *and_right = get_And_right(left);
3064 /* matze: code here used mode instead of cmd_mode, I think it is always
3065 * the same as cmp_mode, but I leave this here to see if this is really
3068 assert(get_irn_mode(and_left) == cmp_mode);
3070 match_arguments(&am, block, and_left, and_right, NULL,
3072 match_am | match_8bit_am | match_16bit_am |
3073 match_am_and_immediates | match_immediate);
3075 /* use 32bit compare mode if possible since the opcode is smaller */
3076 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3077 upper_bits_clean(am.new_op2, cmp_mode)) {
3078 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3081 if (get_mode_size_bits(cmp_mode) == 8) {
3082 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3083 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3085 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3086 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3089 /* Cmp(left, right) */
3090 match_arguments(&am, block, left, right, NULL,
3091 match_commutative | match_am | match_8bit_am |
3092 match_16bit_am | match_am_and_immediates |
3094 /* use 32bit compare mode if possible since the opcode is smaller */
3095 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3096 upper_bits_clean(am.new_op2, cmp_mode)) {
3097 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3100 if (get_mode_size_bits(cmp_mode) == 8) {
3101 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3102 addr->index, addr->mem, am.new_op1,
3103 am.new_op2, am.ins_permuted);
3105 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3106 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3109 set_am_attributes(new_node, &am);
3110 set_ia32_ls_mode(new_node, cmp_mode);
3112 SET_IA32_ORIG_NODE(new_node, node);
3114 new_node = fix_mem_proj(new_node, &am);
3119 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3120 ia32_condition_code_t cc)
3122 dbg_info *dbgi = get_irn_dbg_info(node);
3123 ir_node *block = get_nodes_block(node);
3124 ir_node *new_block = be_transform_node(block);
3125 ir_node *val_true = get_Mux_true(node);
3126 ir_node *val_false = get_Mux_false(node);
3128 ia32_address_mode_t am;
3129 ia32_address_t *addr;
3131 assert(ia32_cg_config.use_cmov);
3132 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3136 match_arguments(&am, block, val_false, val_true, flags,
3137 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3139 if (am.ins_permuted)
3140 cc = ia32_negate_condition_code(cc);
3142 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3143 addr->mem, am.new_op1, am.new_op2, new_flags,
3145 set_am_attributes(new_node, &am);
3147 SET_IA32_ORIG_NODE(new_node, node);
3149 new_node = fix_mem_proj(new_node, &am);
3155 * Creates a ia32 Setcc instruction.
3157 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3158 ir_node *flags, ia32_condition_code_t cc,
3161 ir_mode *mode = get_irn_mode(orig_node);
3164 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3165 SET_IA32_ORIG_NODE(new_node, orig_node);
3167 /* we might need to conv the result up */
3168 if (get_mode_size_bits(mode) > 8) {
3169 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3170 nomem, new_node, mode_Bu);
3171 SET_IA32_ORIG_NODE(new_node, orig_node);
3178 * Create instruction for an unsigned Difference or Zero.
3180 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3182 ir_mode *mode = get_irn_mode(psi);
3192 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3193 match_mode_neutral | match_am | match_immediate | match_two_users);
3195 block = get_nodes_block(new_node);
3197 if (is_Proj(new_node)) {
3198 sub = get_Proj_pred(new_node);
3201 set_irn_mode(sub, mode_T);
3202 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3204 assert(is_ia32_Sub(sub));
3205 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3207 dbgi = get_irn_dbg_info(psi);
3208 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3209 notn = new_bd_ia32_Not(dbgi, block, sbb);
3211 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3212 set_ia32_commutative(new_node);
3217 * Create an const array of two float consts.
3219 * @param c0 the first constant
3220 * @param c1 the second constant
3221 * @param new_mode IN/OUT for the mode of the constants, if NULL
3222 * smallest possible mode will be used
3224 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3227 ir_mode *mode = *new_mode;
3229 ir_initializer_t *initializer;
3230 ir_tarval *tv0 = get_Const_tarval(c0);
3231 ir_tarval *tv1 = get_Const_tarval(c1);
3234 /* detect the best mode for the constants */
3235 mode = get_tarval_mode(tv0);
3237 if (mode != mode_F) {
3238 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3239 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3241 tv0 = tarval_convert_to(tv0, mode);
3242 tv1 = tarval_convert_to(tv1, mode);
3243 } else if (mode != mode_D) {
3244 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3245 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3247 tv0 = tarval_convert_to(tv0, mode);
3248 tv1 = tarval_convert_to(tv1, mode);
3255 tp = ia32_create_float_type(mode, 4);
3256 tp = ia32_create_float_array(tp);
3258 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3260 set_entity_ld_ident(ent, get_entity_ident(ent));
3261 set_entity_visibility(ent, ir_visibility_private);
3262 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3264 initializer = create_initializer_compound(2);
3266 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3267 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3269 set_entity_initializer(ent, initializer);
3276 * Possible transformations for creating a Setcc.
3278 enum setcc_transform_insn {
3291 typedef struct setcc_transform {
3293 ia32_condition_code_t cc;
3295 enum setcc_transform_insn transform;
3299 } setcc_transform_t;
3302 * Setcc can only handle 0 and 1 result.
3303 * Find a transformation that creates 0 and 1 from
3306 static void find_const_transform(ia32_condition_code_t cc,
3307 ir_tarval *t, ir_tarval *f,
3308 setcc_transform_t *res)
3314 if (tarval_is_null(t)) {
3318 cc = ia32_negate_condition_code(cc);
3319 } else if (tarval_cmp(t, f) == ir_relation_less) {
3320 // now, t is the bigger one
3324 cc = ia32_negate_condition_code(cc);
3328 if (! tarval_is_null(f)) {
3329 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3332 res->steps[step].transform = SETCC_TR_ADD;
3334 if (t == tarval_bad)
3335 panic("constant subtract failed");
3336 if (! tarval_is_long(f))
3337 panic("tarval is not long");
3339 res->steps[step].val = get_tarval_long(f);
3341 f = tarval_sub(f, f, NULL);
3342 assert(tarval_is_null(f));
3345 if (tarval_is_one(t)) {
3346 res->steps[step].transform = SETCC_TR_SET;
3347 res->num_steps = ++step;
3351 if (tarval_is_minus_one(t)) {
3352 res->steps[step].transform = SETCC_TR_NEG;
3354 res->steps[step].transform = SETCC_TR_SET;
3355 res->num_steps = ++step;
3358 if (tarval_is_long(t)) {
3359 long v = get_tarval_long(t);
3361 res->steps[step].val = 0;
3364 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3366 res->steps[step].transform = SETCC_TR_LEAxx;
3367 res->steps[step].scale = 3; /* (a << 3) + a */
3370 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3372 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3373 res->steps[step].scale = 3; /* (a << 3) */
3376 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3378 res->steps[step].transform = SETCC_TR_LEAxx;
3379 res->steps[step].scale = 2; /* (a << 2) + a */
3382 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3384 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3385 res->steps[step].scale = 2; /* (a << 2) */
3388 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3390 res->steps[step].transform = SETCC_TR_LEAxx;
3391 res->steps[step].scale = 1; /* (a << 1) + a */
3394 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3396 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3397 res->steps[step].scale = 1; /* (a << 1) */
3400 res->num_steps = step;
3403 if (! tarval_is_single_bit(t)) {
3404 res->steps[step].transform = SETCC_TR_AND;
3405 res->steps[step].val = v;
3407 res->steps[step].transform = SETCC_TR_NEG;
3409 int val = get_tarval_lowest_bit(t);
3412 res->steps[step].transform = SETCC_TR_SHL;
3413 res->steps[step].scale = val;
3417 res->steps[step].transform = SETCC_TR_SET;
3418 res->num_steps = ++step;
3421 panic("tarval is not long");
3425 * Transforms a Mux node into some code sequence.
3427 * @return The transformed node.
3429 static ir_node *gen_Mux(ir_node *node)
3431 dbg_info *dbgi = get_irn_dbg_info(node);
3432 ir_node *block = get_nodes_block(node);
3433 ir_node *new_block = be_transform_node(block);
3434 ir_node *mux_true = get_Mux_true(node);
3435 ir_node *mux_false = get_Mux_false(node);
3436 ir_node *sel = get_Mux_sel(node);
3437 ir_mode *mode = get_irn_mode(node);
3441 ia32_condition_code_t cc;
3443 assert(get_irn_mode(sel) == mode_b);
3445 is_abs = ir_mux_is_abs(sel, mux_true, mux_false);
3447 if (ia32_mode_needs_gp_reg(mode)) {
3448 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3451 ir_node *op = ir_get_abs_op(sel, mux_true, mux_false);
3452 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3456 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3457 if (mode_is_float(mode)) {
3458 ir_node *cmp_left = get_Cmp_left(sel);
3459 ir_node *cmp_right = get_Cmp_right(sel);
3460 ir_relation relation = get_Cmp_relation(sel);
3462 if (ia32_cg_config.use_sse2) {
3463 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3464 if (cmp_left == mux_true && cmp_right == mux_false) {
3465 /* Mux(a <= b, a, b) => MIN */
3466 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3467 match_commutative | match_am | match_two_users);
3468 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3469 /* Mux(a <= b, b, a) => MAX */
3470 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3471 match_commutative | match_am | match_two_users);
3473 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3474 if (cmp_left == mux_true && cmp_right == mux_false) {
3475 /* Mux(a >= b, a, b) => MAX */
3476 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3477 match_commutative | match_am | match_two_users);
3478 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3479 /* Mux(a >= b, b, a) => MIN */
3480 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3481 match_commutative | match_am | match_two_users);
3486 if (is_Const(mux_true) && is_Const(mux_false)) {
3487 ia32_address_mode_t am;
3492 flags = get_flags_node(sel, &cc);
3493 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3495 if (ia32_cg_config.use_sse2) {
3496 /* cannot load from different mode on SSE */
3499 /* x87 can load any mode */
3503 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3505 switch (get_mode_size_bytes(new_mode)) {
3515 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3516 set_ia32_am_scale(new_node, 2);
3521 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3522 set_ia32_am_scale(new_node, 1);
3525 /* arg, shift 16 NOT supported */
3527 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3530 panic("Unsupported constant size");
3533 am.ls_mode = new_mode;
3534 am.addr.base = get_symconst_base();
3535 am.addr.index = new_node;
3536 am.addr.mem = nomem;
3538 am.addr.scale = scale;
3539 am.addr.use_frame = 0;
3540 am.addr.tls_segment = false;
3541 am.addr.frame_entity = NULL;
3542 am.addr.symconst_sign = 0;
3543 am.mem_proj = am.addr.mem;
3544 am.op_type = ia32_AddrModeS;
3547 am.pinned = op_pin_state_floats;
3549 am.ins_permuted = false;
3551 if (ia32_cg_config.use_sse2)
3552 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3554 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3555 set_am_attributes(load, &am);
3557 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3559 panic("cannot transform floating point Mux");
3562 assert(ia32_mode_needs_gp_reg(mode));
3565 ir_node *cmp_left = get_Cmp_left(sel);
3566 ir_node *cmp_right = get_Cmp_right(sel);
3567 ir_relation relation = get_Cmp_relation(sel);
3568 ir_node *val_true = mux_true;
3569 ir_node *val_false = mux_false;
3571 if (is_Const(val_true) && is_Const_null(val_true)) {
3572 ir_node *tmp = val_false;
3573 val_false = val_true;
3575 relation = get_negated_relation(relation);
3577 if (is_Const_0(val_false) && is_Sub(val_true)) {
3578 if ((relation & ir_relation_greater)
3579 && get_Sub_left(val_true) == cmp_left
3580 && get_Sub_right(val_true) == cmp_right) {
3581 return create_doz(node, cmp_left, cmp_right);
3583 if ((relation & ir_relation_less)
3584 && get_Sub_left(val_true) == cmp_right
3585 && get_Sub_right(val_true) == cmp_left) {
3586 return create_doz(node, cmp_right, cmp_left);
3591 flags = get_flags_node(sel, &cc);
3593 if (is_Const(mux_true) && is_Const(mux_false)) {
3594 /* both are const, good */
3595 ir_tarval *tv_true = get_Const_tarval(mux_true);
3596 ir_tarval *tv_false = get_Const_tarval(mux_false);
3597 setcc_transform_t res;
3600 find_const_transform(cc, tv_true, tv_false, &res);
3602 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3605 switch (res.steps[step].transform) {
3607 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3608 add_ia32_am_offs_int(new_node, res.steps[step].val);
3610 case SETCC_TR_ADDxx:
3611 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3614 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3615 set_ia32_am_scale(new_node, res.steps[step].scale);
3616 set_ia32_am_offs_int(new_node, res.steps[step].val);
3618 case SETCC_TR_LEAxx:
3619 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3620 set_ia32_am_scale(new_node, res.steps[step].scale);
3621 set_ia32_am_offs_int(new_node, res.steps[step].val);
3624 imm = ia32_immediate_from_long(res.steps[step].scale);
3625 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3628 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3631 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3634 imm = ia32_immediate_from_long(res.steps[step].val);
3635 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3638 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3641 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3644 panic("unknown setcc transform");
3648 new_node = create_CMov(node, sel, flags, cc);
3656 * Create a conversion from x87 state register to general purpose.
3658 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3660 ir_node *block = be_transform_node(get_nodes_block(node));
3661 ir_node *op = get_Conv_op(node);
3662 ir_node *new_op = be_transform_node(op);
3663 ir_graph *irg = current_ir_graph;
3664 dbg_info *dbgi = get_irn_dbg_info(node);
3665 ir_mode *mode = get_irn_mode(node);
3666 ir_node *frame = get_irg_frame(irg);
3667 ir_node *fist, *load, *mem;
3669 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3670 set_irn_pinned(fist, op_pin_state_floats);
3671 set_ia32_use_frame(fist);
3672 set_ia32_op_type(fist, ia32_AddrModeD);
3674 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3675 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3677 assert(get_mode_size_bits(mode) <= 32);
3678 /* exception we can only store signed 32 bit integers, so for unsigned
3679 we store a 64bit (signed) integer and load the lower bits */
3680 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3681 set_ia32_ls_mode(fist, mode_Ls);
3683 set_ia32_ls_mode(fist, mode_Is);
3685 SET_IA32_ORIG_NODE(fist, node);
3688 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3690 set_irn_pinned(load, op_pin_state_floats);
3691 set_ia32_use_frame(load);
3692 set_ia32_op_type(load, ia32_AddrModeS);
3693 set_ia32_ls_mode(load, mode_Is);
3694 if (get_ia32_ls_mode(fist) == mode_Ls) {
3695 ia32_attr_t *attr = get_ia32_attr(load);
3696 attr->data.need_64bit_stackent = 1;
3698 ia32_attr_t *attr = get_ia32_attr(load);
3699 attr->data.need_32bit_stackent = 1;
3701 SET_IA32_ORIG_NODE(load, node);
3703 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3707 * Creates a x87 strict Conv by placing a Store and a Load
3709 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3711 ir_node *block = get_nodes_block(node);
3712 ir_graph *irg = get_Block_irg(block);
3713 dbg_info *dbgi = get_irn_dbg_info(node);
3714 ir_node *frame = get_irg_frame(irg);
3716 ir_node *store, *load;
3719 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3720 set_ia32_use_frame(store);
3721 set_ia32_op_type(store, ia32_AddrModeD);
3722 SET_IA32_ORIG_NODE(store, node);
3724 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3726 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3727 set_ia32_use_frame(load);
3728 set_ia32_op_type(load, ia32_AddrModeS);
3729 SET_IA32_ORIG_NODE(load, node);
3731 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3735 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3736 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3738 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3740 func = get_mode_size_bits(mode) == 8 ?
3741 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3742 return func(dbgi, block, base, index, mem, val, mode);
3746 * Create a conversion from general purpose to x87 register
3748 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3750 ir_node *src_block = get_nodes_block(node);
3751 ir_node *block = be_transform_node(src_block);
3752 ir_graph *irg = get_Block_irg(block);
3753 dbg_info *dbgi = get_irn_dbg_info(node);
3754 ir_node *op = get_Conv_op(node);
3755 ir_node *new_op = NULL;
3757 ir_mode *store_mode;
3763 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3764 if (possible_int_mode_for_fp(src_mode)) {
3765 ia32_address_mode_t am;
3767 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3768 if (am.op_type == ia32_AddrModeS) {
3769 ia32_address_t *addr = &am.addr;
3771 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3772 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3774 set_am_attributes(fild, &am);
3775 SET_IA32_ORIG_NODE(fild, node);
3777 fix_mem_proj(fild, &am);
3782 if (new_op == NULL) {
3783 new_op = be_transform_node(op);
3786 mode = get_irn_mode(op);
3788 /* first convert to 32 bit signed if necessary */
3789 if (get_mode_size_bits(src_mode) < 32) {
3790 if (!upper_bits_clean(new_op, src_mode)) {
3791 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3792 SET_IA32_ORIG_NODE(new_op, node);
3797 assert(get_mode_size_bits(mode) == 32);
3800 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3802 set_ia32_use_frame(store);
3803 set_ia32_op_type(store, ia32_AddrModeD);
3804 set_ia32_ls_mode(store, mode_Iu);
3806 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3808 /* exception for 32bit unsigned, do a 64bit spill+load */
3809 if (!mode_is_signed(mode)) {
3812 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3814 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3815 noreg_GP, nomem, zero_const);
3816 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3818 set_ia32_use_frame(zero_store);
3819 set_ia32_op_type(zero_store, ia32_AddrModeD);
3820 add_ia32_am_offs_int(zero_store, 4);
3821 set_ia32_ls_mode(zero_store, mode_Iu);
3823 in[0] = zero_store_mem;
3826 store_mem = new_rd_Sync(dbgi, block, 2, in);
3827 store_mode = mode_Ls;
3829 store_mode = mode_Is;
3833 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3835 set_ia32_use_frame(fild);
3836 set_ia32_op_type(fild, ia32_AddrModeS);
3837 set_ia32_ls_mode(fild, store_mode);
3839 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3845 * Create a conversion from one integer mode into another one
3847 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3848 dbg_info *dbgi, ir_node *block, ir_node *op,
3851 ir_node *new_block = be_transform_node(block);
3853 ir_mode *smaller_mode;
3854 ia32_address_mode_t am;
3855 ia32_address_t *addr = &am.addr;
3858 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3859 smaller_mode = src_mode;
3861 smaller_mode = tgt_mode;
3864 #ifdef DEBUG_libfirm
3866 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3871 match_arguments(&am, block, NULL, op, NULL,
3872 match_am | match_8bit_am | match_16bit_am);
3874 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3875 /* unnecessary conv. in theory it shouldn't have been AM */
3876 assert(is_ia32_NoReg_GP(addr->base));
3877 assert(is_ia32_NoReg_GP(addr->index));
3878 assert(is_NoMem(addr->mem));
3879 assert(am.addr.offset == 0);
3880 assert(am.addr.symconst_ent == NULL);
3884 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3885 addr->mem, am.new_op2, smaller_mode);
3886 set_am_attributes(new_node, &am);
3887 /* match_arguments assume that out-mode = in-mode, this isn't true here
3889 set_ia32_ls_mode(new_node, smaller_mode);
3890 SET_IA32_ORIG_NODE(new_node, node);
3891 new_node = fix_mem_proj(new_node, &am);
3896 * Transforms a Conv node.
3898 * @return The created ia32 Conv node
3900 static ir_node *gen_Conv(ir_node *node)
3902 ir_node *block = get_nodes_block(node);
3903 ir_node *new_block = be_transform_node(block);
3904 ir_node *op = get_Conv_op(node);
3905 ir_node *new_op = NULL;
3906 dbg_info *dbgi = get_irn_dbg_info(node);
3907 ir_mode *src_mode = get_irn_mode(op);
3908 ir_mode *tgt_mode = get_irn_mode(node);
3909 int src_bits = get_mode_size_bits(src_mode);
3910 int tgt_bits = get_mode_size_bits(tgt_mode);
3911 ir_node *res = NULL;
3913 assert(!mode_is_int(src_mode) || src_bits <= 32);
3914 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3916 /* modeB -> X should already be lowered by the lower_mode_b pass */
3917 if (src_mode == mode_b) {
3918 panic("ConvB not lowered %+F", node);
3921 if (src_mode == tgt_mode) {
3922 if (get_Conv_strict(node)) {
3923 if (ia32_cg_config.use_sse2) {
3924 /* when we are in SSE mode, we can kill all strict no-op conversion */
3925 return be_transform_node(op);
3928 /* this should be optimized already, but who knows... */
3929 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3930 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3931 return be_transform_node(op);
3935 if (mode_is_float(src_mode)) {
3936 new_op = be_transform_node(op);
3937 /* we convert from float ... */
3938 if (mode_is_float(tgt_mode)) {
3940 if (ia32_cg_config.use_sse2) {
3941 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3942 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3944 set_ia32_ls_mode(res, tgt_mode);
3946 if (get_Conv_strict(node)) {
3947 /* if fp_no_float_fold is not set then we assume that we
3948 * don't have any float operations in a non
3949 * mode_float_arithmetic mode and can skip strict upconvs */
3950 if (src_bits < tgt_bits) {
3951 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3954 res = gen_x87_strict_conv(tgt_mode, new_op);
3955 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3959 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3964 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3965 if (ia32_cg_config.use_sse2) {
3966 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3968 set_ia32_ls_mode(res, src_mode);
3970 return gen_x87_fp_to_gp(node);
3974 /* we convert from int ... */
3975 if (mode_is_float(tgt_mode)) {
3977 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3978 if (ia32_cg_config.use_sse2) {
3979 new_op = be_transform_node(op);
3980 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3982 set_ia32_ls_mode(res, tgt_mode);
3984 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3985 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3986 res = gen_x87_gp_to_fp(node, src_mode);
3988 /* we need a strict-Conv, if the int mode has more bits than the
3990 if (float_mantissa < int_mantissa) {
3991 res = gen_x87_strict_conv(tgt_mode, res);
3992 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3996 } else if (tgt_mode == mode_b) {
3997 /* mode_b lowering already took care that we only have 0/1 values */
3998 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3999 src_mode, tgt_mode));
4000 return be_transform_node(op);
4003 if (src_bits == tgt_bits) {
4004 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4005 src_mode, tgt_mode));
4006 return be_transform_node(op);
4009 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4017 static ir_node *create_immediate_or_transform(ir_node *node,
4018 char immediate_constraint_type)
4020 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4021 if (new_node == NULL) {
4022 new_node = be_transform_node(node);
4028 * Transforms a FrameAddr into an ia32 Add.
4030 static ir_node *gen_be_FrameAddr(ir_node *node)
4032 ir_node *block = be_transform_node(get_nodes_block(node));
4033 ir_node *op = be_get_FrameAddr_frame(node);
4034 ir_node *new_op = be_transform_node(op);
4035 dbg_info *dbgi = get_irn_dbg_info(node);
4038 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4039 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4040 set_ia32_use_frame(new_node);
4042 SET_IA32_ORIG_NODE(new_node, node);
4048 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4050 static ir_node *gen_be_Return(ir_node *node)
4052 ir_graph *irg = current_ir_graph;
4053 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4054 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4055 ir_node *new_ret_val = be_transform_node(ret_val);
4056 ir_node *new_ret_mem = be_transform_node(ret_mem);
4057 ir_entity *ent = get_irg_entity(irg);
4058 ir_type *tp = get_entity_type(ent);
4059 dbg_info *dbgi = get_irn_dbg_info(node);
4060 ir_node *block = be_transform_node(get_nodes_block(node));
4074 assert(ret_val != NULL);
4075 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4076 return be_duplicate_node(node);
4079 res_type = get_method_res_type(tp, 0);
4081 if (! is_Primitive_type(res_type)) {
4082 return be_duplicate_node(node);
4085 mode = get_type_mode(res_type);
4086 if (! mode_is_float(mode)) {
4087 return be_duplicate_node(node);
4090 assert(get_method_n_ress(tp) == 1);
4092 frame = get_irg_frame(irg);
4094 /* store xmm0 onto stack */
4095 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4096 new_ret_mem, new_ret_val);
4097 set_ia32_ls_mode(sse_store, mode);
4098 set_ia32_op_type(sse_store, ia32_AddrModeD);
4099 set_ia32_use_frame(sse_store);
4100 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4102 /* load into x87 register */
4103 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4104 set_ia32_op_type(fld, ia32_AddrModeS);
4105 set_ia32_use_frame(fld);
4107 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4108 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4110 /* create a new return */
4111 arity = get_irn_arity(node);
4112 in = ALLOCAN(ir_node*, arity);
4113 pop = be_Return_get_pop(node);
4114 for (i = 0; i < arity; ++i) {
4115 ir_node *op = get_irn_n(node, i);
4116 if (op == ret_val) {
4118 } else if (op == ret_mem) {
4121 in[i] = be_transform_node(op);
4124 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4125 copy_node_attr(irg, node, new_node);
4131 * Transform a be_AddSP into an ia32_SubSP.
4133 static ir_node *gen_be_AddSP(ir_node *node)
4135 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4136 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4138 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4139 match_am | match_immediate);
4140 assert(is_ia32_SubSP(new_node));
4141 arch_irn_set_register(new_node, pn_ia32_SubSP_stack,
4142 &ia32_registers[REG_ESP]);
4147 * Transform a be_SubSP into an ia32_AddSP
4149 static ir_node *gen_be_SubSP(ir_node *node)
4151 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4152 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4154 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4155 match_am | match_immediate);
4156 assert(is_ia32_AddSP(new_node));
4157 arch_irn_set_register(new_node, pn_ia32_AddSP_stack,
4158 &ia32_registers[REG_ESP]);
4163 * Change some phi modes
4165 static ir_node *gen_Phi(ir_node *node)
4167 const arch_register_req_t *req;
4168 ir_node *block = be_transform_node(get_nodes_block(node));
4169 ir_graph *irg = current_ir_graph;
4170 dbg_info *dbgi = get_irn_dbg_info(node);
4171 ir_mode *mode = get_irn_mode(node);
4174 if (ia32_mode_needs_gp_reg(mode)) {
4175 /* we shouldn't have any 64bit stuff around anymore */
4176 assert(get_mode_size_bits(mode) <= 32);
4177 /* all integer operations are on 32bit registers now */
4179 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4180 } else if (mode_is_float(mode)) {
4181 if (ia32_cg_config.use_sse2) {
4183 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4186 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4189 req = arch_no_register_req;
4192 /* phi nodes allow loops, so we use the old arguments for now
4193 * and fix this later */
4194 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4195 get_irn_in(node) + 1);
4196 copy_node_attr(irg, node, phi);
4197 be_duplicate_deps(node, phi);
4199 arch_set_out_register_req(phi, 0, req);
4201 be_enqueue_preds(node);
4206 static ir_node *gen_Jmp(ir_node *node)
4208 ir_node *block = get_nodes_block(node);
4209 ir_node *new_block = be_transform_node(block);
4210 dbg_info *dbgi = get_irn_dbg_info(node);
4213 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4214 SET_IA32_ORIG_NODE(new_node, node);
4222 static ir_node *gen_IJmp(ir_node *node)
4224 ir_node *block = get_nodes_block(node);
4225 ir_node *new_block = be_transform_node(block);
4226 dbg_info *dbgi = get_irn_dbg_info(node);
4227 ir_node *op = get_IJmp_target(node);
4229 ia32_address_mode_t am;
4230 ia32_address_t *addr = &am.addr;
4232 assert(get_irn_mode(op) == mode_P);
4234 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4236 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4237 addr->mem, am.new_op2);
4238 set_am_attributes(new_node, &am);
4239 SET_IA32_ORIG_NODE(new_node, node);
4241 new_node = fix_mem_proj(new_node, &am);
4246 static ir_node *gen_ia32_l_Add(ir_node *node)
4248 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4249 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4250 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4251 match_commutative | match_am | match_immediate |
4252 match_mode_neutral);
4254 if (is_Proj(lowered)) {
4255 lowered = get_Proj_pred(lowered);
4257 assert(is_ia32_Add(lowered));
4258 set_irn_mode(lowered, mode_T);
4264 static ir_node *gen_ia32_l_Adc(ir_node *node)
4266 return gen_binop_flags(node, new_bd_ia32_Adc,
4267 match_commutative | match_am | match_immediate |
4268 match_mode_neutral);
4272 * Transforms a l_MulS into a "real" MulS node.
4274 * @return the created ia32 Mul node
4276 static ir_node *gen_ia32_l_Mul(ir_node *node)
4278 ir_node *left = get_binop_left(node);
4279 ir_node *right = get_binop_right(node);
4281 return gen_binop(node, left, right, new_bd_ia32_Mul,
4282 match_commutative | match_am | match_mode_neutral);
4286 * Transforms a l_IMulS into a "real" IMul1OPS node.
4288 * @return the created ia32 IMul1OP node
4290 static ir_node *gen_ia32_l_IMul(ir_node *node)
4292 ir_node *left = get_binop_left(node);
4293 ir_node *right = get_binop_right(node);
4295 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4296 match_commutative | match_am | match_mode_neutral);
4299 static ir_node *gen_ia32_l_Sub(ir_node *node)
4301 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4302 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4303 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4304 match_am | match_immediate | match_mode_neutral);
4306 if (is_Proj(lowered)) {
4307 lowered = get_Proj_pred(lowered);
4309 assert(is_ia32_Sub(lowered));
4310 set_irn_mode(lowered, mode_T);
4316 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4318 return gen_binop_flags(node, new_bd_ia32_Sbb,
4319 match_am | match_immediate | match_mode_neutral);
4322 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4324 ir_node *src_block = get_nodes_block(node);
4325 ir_node *block = be_transform_node(src_block);
4326 ir_graph *irg = current_ir_graph;
4327 dbg_info *dbgi = get_irn_dbg_info(node);
4328 ir_node *frame = get_irg_frame(irg);
4329 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4330 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4331 ir_node *new_val_low = be_transform_node(val_low);
4332 ir_node *new_val_high = be_transform_node(val_high);
4334 ir_node *sync, *fild, *res;
4336 ir_node *store_high;
4340 if (ia32_cg_config.use_sse2) {
4341 panic("ia32_l_LLtoFloat not implemented for SSE2");
4345 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4347 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4349 SET_IA32_ORIG_NODE(store_low, node);
4350 SET_IA32_ORIG_NODE(store_high, node);
4352 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4353 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4355 set_ia32_use_frame(store_low);
4356 set_ia32_use_frame(store_high);
4357 set_ia32_op_type(store_low, ia32_AddrModeD);
4358 set_ia32_op_type(store_high, ia32_AddrModeD);
4359 set_ia32_ls_mode(store_low, mode_Iu);
4360 set_ia32_ls_mode(store_high, mode_Is);
4361 add_ia32_am_offs_int(store_high, 4);
4365 sync = new_rd_Sync(dbgi, block, 2, in);
4368 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4370 set_ia32_use_frame(fild);
4371 set_ia32_op_type(fild, ia32_AddrModeS);
4372 set_ia32_ls_mode(fild, mode_Ls);
4374 SET_IA32_ORIG_NODE(fild, node);
4376 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4378 if (! mode_is_signed(get_irn_mode(val_high))) {
4379 ia32_address_mode_t am;
4381 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4384 am.addr.base = get_symconst_base();
4385 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4386 am.addr.mem = nomem;
4389 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4390 am.addr.tls_segment = false;
4391 am.addr.use_frame = 0;
4392 am.addr.frame_entity = NULL;
4393 am.addr.symconst_sign = 0;
4394 am.ls_mode = mode_F;
4395 am.mem_proj = nomem;
4396 am.op_type = ia32_AddrModeS;
4398 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4399 am.pinned = op_pin_state_floats;
4401 am.ins_permuted = false;
4403 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4404 am.new_op1, am.new_op2, get_fpcw());
4405 set_am_attributes(fadd, &am);
4407 set_irn_mode(fadd, mode_T);
4408 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4413 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4415 ir_node *src_block = get_nodes_block(node);
4416 ir_node *block = be_transform_node(src_block);
4417 ir_graph *irg = get_Block_irg(block);
4418 dbg_info *dbgi = get_irn_dbg_info(node);
4419 ir_node *frame = get_irg_frame(irg);
4420 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4421 ir_node *new_val = be_transform_node(val);
4424 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4425 SET_IA32_ORIG_NODE(fist, node);
4426 set_ia32_use_frame(fist);
4427 set_ia32_op_type(fist, ia32_AddrModeD);
4428 set_ia32_ls_mode(fist, mode_Ls);
4430 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4431 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4434 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4436 ir_node *block = be_transform_node(get_nodes_block(node));
4437 ir_graph *irg = get_Block_irg(block);
4438 ir_node *pred = get_Proj_pred(node);
4439 ir_node *new_pred = be_transform_node(pred);
4440 ir_node *frame = get_irg_frame(irg);
4441 dbg_info *dbgi = get_irn_dbg_info(node);
4442 long pn = get_Proj_proj(node);
4447 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4448 SET_IA32_ORIG_NODE(load, node);
4449 set_ia32_use_frame(load);
4450 set_ia32_op_type(load, ia32_AddrModeS);
4451 set_ia32_ls_mode(load, mode_Iu);
4452 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4453 * 32 bit from it with this particular load */
4454 attr = get_ia32_attr(load);
4455 attr->data.need_64bit_stackent = 1;
4457 if (pn == pn_ia32_l_FloattoLL_res_high) {
4458 add_ia32_am_offs_int(load, 4);
4460 assert(pn == pn_ia32_l_FloattoLL_res_low);
4463 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4469 * Transform the Projs of an AddSP.
4471 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4473 ir_node *pred = get_Proj_pred(node);
4474 ir_node *new_pred = be_transform_node(pred);
4475 dbg_info *dbgi = get_irn_dbg_info(node);
4476 long proj = get_Proj_proj(node);
4478 if (proj == pn_be_AddSP_sp) {
4479 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4480 pn_ia32_SubSP_stack);
4481 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4483 } else if (proj == pn_be_AddSP_res) {
4484 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4485 pn_ia32_SubSP_addr);
4486 } else if (proj == pn_be_AddSP_M) {
4487 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4490 panic("No idea how to transform proj->AddSP");
4494 * Transform the Projs of a SubSP.
4496 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4498 ir_node *pred = get_Proj_pred(node);
4499 ir_node *new_pred = be_transform_node(pred);
4500 dbg_info *dbgi = get_irn_dbg_info(node);
4501 long proj = get_Proj_proj(node);
4503 if (proj == pn_be_SubSP_sp) {
4504 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4505 pn_ia32_AddSP_stack);
4506 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4508 } else if (proj == pn_be_SubSP_M) {
4509 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4512 panic("No idea how to transform proj->SubSP");
4516 * Transform and renumber the Projs from a Load.
4518 static ir_node *gen_Proj_Load(ir_node *node)
4521 ir_node *pred = get_Proj_pred(node);
4522 dbg_info *dbgi = get_irn_dbg_info(node);
4523 long proj = get_Proj_proj(node);
4525 /* loads might be part of source address mode matches, so we don't
4526 * transform the ProjMs yet (with the exception of loads whose result is
4529 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4532 /* this is needed, because sometimes we have loops that are only
4533 reachable through the ProjM */
4534 be_enqueue_preds(node);
4535 /* do it in 2 steps, to silence firm verifier */
4536 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4537 set_Proj_proj(res, pn_ia32_mem);
4541 /* renumber the proj */
4542 new_pred = be_transform_node(pred);
4543 if (is_ia32_Load(new_pred)) {
4544 switch ((pn_Load)proj) {
4546 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4548 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4549 case pn_Load_X_except:
4550 /* This Load might raise an exception. Mark it. */
4551 set_ia32_exc_label(new_pred, 1);
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4553 case pn_Load_X_regular:
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4556 } else if (is_ia32_Conv_I2I(new_pred) ||
4557 is_ia32_Conv_I2I8Bit(new_pred)) {
4558 set_irn_mode(new_pred, mode_T);
4559 switch ((pn_Load)proj) {
4561 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4563 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4564 case pn_Load_X_except:
4565 /* This Load might raise an exception. Mark it. */
4566 set_ia32_exc_label(new_pred, 1);
4567 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4568 case pn_Load_X_regular:
4569 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4571 } else if (is_ia32_xLoad(new_pred)) {
4572 switch ((pn_Load)proj) {
4574 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4576 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4577 case pn_Load_X_except:
4578 /* This Load might raise an exception. Mark it. */
4579 set_ia32_exc_label(new_pred, 1);
4580 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4581 case pn_Load_X_regular:
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4584 } else if (is_ia32_vfld(new_pred)) {
4585 switch ((pn_Load)proj) {
4587 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4589 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4590 case pn_Load_X_except:
4591 /* This Load might raise an exception. Mark it. */
4592 set_ia32_exc_label(new_pred, 1);
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4594 case pn_Load_X_regular:
4595 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4598 /* can happen for ProJMs when source address mode happened for the
4601 /* however it should not be the result proj, as that would mean the
4602 load had multiple users and should not have been used for
4604 if (proj != pn_Load_M) {
4605 panic("internal error: transformed node not a Load");
4607 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4610 panic("No idea how to transform Proj(Load) %+F", node);
4613 static ir_node *gen_Proj_Store(ir_node *node)
4615 ir_node *pred = get_Proj_pred(node);
4616 ir_node *new_pred = be_transform_node(pred);
4617 dbg_info *dbgi = get_irn_dbg_info(node);
4618 long pn = get_Proj_proj(node);
4620 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4621 switch ((pn_Store)pn) {
4623 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4624 case pn_Store_X_except:
4625 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4626 case pn_Store_X_regular:
4627 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4629 } else if (is_ia32_vfist(new_pred)) {
4630 switch ((pn_Store)pn) {
4632 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4633 case pn_Store_X_except:
4634 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4635 case pn_Store_X_regular:
4636 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4638 } else if (is_ia32_vfisttp(new_pred)) {
4639 switch ((pn_Store)pn) {
4641 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4642 case pn_Store_X_except:
4643 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4644 case pn_Store_X_regular:
4645 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4647 } else if (is_ia32_vfst(new_pred)) {
4648 switch ((pn_Store)pn) {
4650 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4651 case pn_Store_X_except:
4652 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4653 case pn_Store_X_regular:
4654 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4656 } else if (is_ia32_xStore(new_pred)) {
4657 switch ((pn_Store)pn) {
4659 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4660 case pn_Store_X_except:
4661 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4662 case pn_Store_X_regular:
4663 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4665 } else if (is_Sync(new_pred)) {
4666 /* hack for the case that gen_float_const_Store produced a Sync */
4667 if (pn == pn_Store_M) {
4670 panic("exception control flow for gen_float_const_Store not implemented yet");
4671 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4672 /* destination address mode */
4673 if (pn == pn_Store_M) {
4676 panic("exception control flow for destination AM not implemented yet");
4679 panic("No idea how to transform Proj(Store) %+F", node);
4683 * Transform and renumber the Projs from a Div or Mod instruction.
4685 static ir_node *gen_Proj_Div(ir_node *node)
4687 ir_node *pred = get_Proj_pred(node);
4688 ir_node *new_pred = be_transform_node(pred);
4689 dbg_info *dbgi = get_irn_dbg_info(node);
4690 long proj = get_Proj_proj(node);
4692 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4693 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4695 switch ((pn_Div)proj) {
4697 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4698 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4699 } else if (is_ia32_xDiv(new_pred)) {
4700 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4701 } else if (is_ia32_vfdiv(new_pred)) {
4702 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4704 panic("Div transformed to unexpected thing %+F", new_pred);
4707 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4708 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4709 } else if (is_ia32_xDiv(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4711 } else if (is_ia32_vfdiv(new_pred)) {
4712 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4714 panic("Div transformed to unexpected thing %+F", new_pred);
4716 case pn_Div_X_except:
4717 set_ia32_exc_label(new_pred, 1);
4718 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4719 case pn_Div_X_regular:
4720 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4723 panic("No idea how to transform proj->Div");
4727 * Transform and renumber the Projs from a Div or Mod instruction.
4729 static ir_node *gen_Proj_Mod(ir_node *node)
4731 ir_node *pred = get_Proj_pred(node);
4732 ir_node *new_pred = be_transform_node(pred);
4733 dbg_info *dbgi = get_irn_dbg_info(node);
4734 long proj = get_Proj_proj(node);
4736 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4737 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4738 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4740 switch ((pn_Mod)proj) {
4742 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4744 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4745 case pn_Mod_X_except:
4746 set_ia32_exc_label(new_pred, 1);
4747 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4748 case pn_Mod_X_regular:
4749 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4751 panic("No idea how to transform proj->Mod");
4755 * Transform and renumber the Projs from a CopyB.
4757 static ir_node *gen_Proj_CopyB(ir_node *node)
4759 ir_node *pred = get_Proj_pred(node);
4760 ir_node *new_pred = be_transform_node(pred);
4761 dbg_info *dbgi = get_irn_dbg_info(node);
4762 long proj = get_Proj_proj(node);
4764 switch ((pn_CopyB)proj) {
4766 if (is_ia32_CopyB_i(new_pred)) {
4767 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4768 } else if (is_ia32_CopyB(new_pred)) {
4769 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4772 case pn_CopyB_X_regular:
4773 if (is_ia32_CopyB_i(new_pred)) {
4774 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4775 } else if (is_ia32_CopyB(new_pred)) {
4776 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4779 case pn_CopyB_X_except:
4780 if (is_ia32_CopyB_i(new_pred)) {
4781 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4782 } else if (is_ia32_CopyB(new_pred)) {
4783 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4788 panic("No idea how to transform proj->CopyB");
4791 static ir_node *gen_be_Call(ir_node *node)
4793 dbg_info *const dbgi = get_irn_dbg_info(node);
4794 ir_node *const src_block = get_nodes_block(node);
4795 ir_node *const block = be_transform_node(src_block);
4796 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4797 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4798 ir_node *const sp = be_transform_node(src_sp);
4799 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4800 ia32_address_mode_t am;
4801 ia32_address_t *const addr = &am.addr;
4806 ir_node * eax = noreg_GP;
4807 ir_node * ecx = noreg_GP;
4808 ir_node * edx = noreg_GP;
4809 unsigned const pop = be_Call_get_pop(node);
4810 ir_type *const call_tp = be_Call_get_type(node);
4811 int old_no_pic_adjust;
4812 int throws_exception = ir_throws_exception(node);
4814 /* Run the x87 simulator if the call returns a float value */
4815 if (get_method_n_ress(call_tp) > 0) {
4816 ir_type *const res_type = get_method_res_type(call_tp, 0);
4817 ir_mode *const res_mode = get_type_mode(res_type);
4819 if (res_mode != NULL && mode_is_float(res_mode)) {
4820 ir_graph *irg = current_ir_graph;
4821 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4822 irg_data->do_x87_sim = 1;
4826 /* We do not want be_Call direct calls */
4827 assert(be_Call_get_entity(node) == NULL);
4829 /* special case for PIC trampoline calls */
4830 old_no_pic_adjust = ia32_no_pic_adjust;
4831 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4833 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4834 match_am | match_immediate);
4836 ia32_no_pic_adjust = old_no_pic_adjust;
4838 i = get_irn_arity(node) - 1;
4839 fpcw = be_transform_node(get_irn_n(node, i--));
4840 for (; i >= n_be_Call_first_arg; --i) {
4841 arch_register_req_t const *const req = arch_get_register_req(node, i);
4842 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4844 assert(req->type == arch_register_req_type_limited);
4845 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4847 switch (*req->limited) {
4848 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4849 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4850 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4851 default: panic("Invalid GP register for register parameter");
4855 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4856 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4857 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4858 ir_set_throws_exception(call, throws_exception);
4859 set_am_attributes(call, &am);
4860 call = fix_mem_proj(call, &am);
4862 if (get_irn_pinned(node) == op_pin_state_pinned)
4863 set_irn_pinned(call, op_pin_state_pinned);
4865 SET_IA32_ORIG_NODE(call, node);
4867 if (ia32_cg_config.use_sse2) {
4868 /* remember this call for post-processing */
4869 ARR_APP1(ir_node *, call_list, call);
4870 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4877 * Transform Builtin trap
4879 static ir_node *gen_trap(ir_node *node)
4881 dbg_info *dbgi = get_irn_dbg_info(node);
4882 ir_node *block = be_transform_node(get_nodes_block(node));
4883 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4885 return new_bd_ia32_UD2(dbgi, block, mem);
4889 * Transform Builtin debugbreak
4891 static ir_node *gen_debugbreak(ir_node *node)
4893 dbg_info *dbgi = get_irn_dbg_info(node);
4894 ir_node *block = be_transform_node(get_nodes_block(node));
4895 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4897 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4901 * Transform Builtin return_address
4903 static ir_node *gen_return_address(ir_node *node)
4905 ir_node *param = get_Builtin_param(node, 0);
4906 ir_node *frame = get_Builtin_param(node, 1);
4907 dbg_info *dbgi = get_irn_dbg_info(node);
4908 ir_tarval *tv = get_Const_tarval(param);
4909 ir_graph *irg = get_irn_irg(node);
4910 unsigned long value = get_tarval_long(tv);
4912 ir_node *block = be_transform_node(get_nodes_block(node));
4913 ir_node *ptr = be_transform_node(frame);
4917 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4918 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4919 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4922 /* load the return address from this frame */
4923 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4925 set_irn_pinned(load, get_irn_pinned(node));
4926 set_ia32_op_type(load, ia32_AddrModeS);
4927 set_ia32_ls_mode(load, mode_Iu);
4929 set_ia32_am_offs_int(load, 0);
4930 set_ia32_use_frame(load);
4931 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4933 if (get_irn_pinned(node) == op_pin_state_floats) {
4934 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4935 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4936 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4937 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4940 SET_IA32_ORIG_NODE(load, node);
4941 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4945 * Transform Builtin frame_address
4947 static ir_node *gen_frame_address(ir_node *node)
4949 ir_node *param = get_Builtin_param(node, 0);
4950 ir_node *frame = get_Builtin_param(node, 1);
4951 dbg_info *dbgi = get_irn_dbg_info(node);
4952 ir_tarval *tv = get_Const_tarval(param);
4953 ir_graph *irg = get_irn_irg(node);
4954 unsigned long value = get_tarval_long(tv);
4956 ir_node *block = be_transform_node(get_nodes_block(node));
4957 ir_node *ptr = be_transform_node(frame);
4962 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4963 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4964 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4967 /* load the frame address from this frame */
4968 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4970 set_irn_pinned(load, get_irn_pinned(node));
4971 set_ia32_op_type(load, ia32_AddrModeS);
4972 set_ia32_ls_mode(load, mode_Iu);
4974 ent = ia32_get_frame_address_entity(irg);
4976 set_ia32_am_offs_int(load, 0);
4977 set_ia32_use_frame(load);
4978 set_ia32_frame_ent(load, ent);
4980 /* will fail anyway, but gcc does this: */
4981 set_ia32_am_offs_int(load, 0);
4984 if (get_irn_pinned(node) == op_pin_state_floats) {
4985 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4986 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4987 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4988 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4991 SET_IA32_ORIG_NODE(load, node);
4992 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4996 * Transform Builtin frame_address
4998 static ir_node *gen_prefetch(ir_node *node)
5001 ir_node *ptr, *block, *mem, *base, *idx;
5002 ir_node *param, *new_node;
5005 ia32_address_t addr;
5007 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5008 /* no prefetch at all, route memory */
5009 return be_transform_node(get_Builtin_mem(node));
5012 param = get_Builtin_param(node, 1);
5013 tv = get_Const_tarval(param);
5014 rw = get_tarval_long(tv);
5016 /* construct load address */
5017 memset(&addr, 0, sizeof(addr));
5018 ptr = get_Builtin_param(node, 0);
5019 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5026 base = be_transform_node(base);
5032 idx = be_transform_node(idx);
5035 dbgi = get_irn_dbg_info(node);
5036 block = be_transform_node(get_nodes_block(node));
5037 mem = be_transform_node(get_Builtin_mem(node));
5039 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5040 /* we have 3DNow!, this was already checked above */
5041 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
5042 } else if (ia32_cg_config.use_sse_prefetch) {
5043 /* note: rw == 1 is IGNORED in that case */
5044 param = get_Builtin_param(node, 2);
5045 tv = get_Const_tarval(param);
5046 locality = get_tarval_long(tv);
5048 /* SSE style prefetch */
5051 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5054 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5057 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5060 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5064 assert(ia32_cg_config.use_3dnow_prefetch);
5065 /* 3DNow! style prefetch */
5066 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5069 set_irn_pinned(new_node, get_irn_pinned(node));
5070 set_ia32_op_type(new_node, ia32_AddrModeS);
5071 set_ia32_ls_mode(new_node, mode_Bu);
5072 set_address(new_node, &addr);
5074 SET_IA32_ORIG_NODE(new_node, node);
5076 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5080 * Transform bsf like node
5082 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5084 ir_node *param = get_Builtin_param(node, 0);
5085 dbg_info *dbgi = get_irn_dbg_info(node);
5087 ir_node *block = get_nodes_block(node);
5088 ir_node *new_block = be_transform_node(block);
5090 ia32_address_mode_t am;
5091 ia32_address_t *addr = &am.addr;
5094 match_arguments(&am, block, NULL, param, NULL, match_am);
5096 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5097 set_am_attributes(cnt, &am);
5098 set_ia32_ls_mode(cnt, get_irn_mode(param));
5100 SET_IA32_ORIG_NODE(cnt, node);
5101 return fix_mem_proj(cnt, &am);
5105 * Transform builtin ffs.
5107 static ir_node *gen_ffs(ir_node *node)
5109 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5110 ir_node *real = skip_Proj(bsf);
5111 dbg_info *dbgi = get_irn_dbg_info(real);
5112 ir_node *block = get_nodes_block(real);
5113 ir_node *flag, *set, *conv, *neg, *orn, *add;
5116 if (get_irn_mode(real) != mode_T) {
5117 set_irn_mode(real, mode_T);
5118 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5121 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5124 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5125 SET_IA32_ORIG_NODE(set, node);
5128 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5129 SET_IA32_ORIG_NODE(conv, node);
5132 neg = new_bd_ia32_Neg(dbgi, block, conv);
5135 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5136 set_ia32_commutative(orn);
5139 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5140 add_ia32_am_offs_int(add, 1);
5145 * Transform builtin clz.
5147 static ir_node *gen_clz(ir_node *node)
5149 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5150 ir_node *real = skip_Proj(bsr);
5151 dbg_info *dbgi = get_irn_dbg_info(real);
5152 ir_node *block = get_nodes_block(real);
5153 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5155 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5159 * Transform builtin ctz.
5161 static ir_node *gen_ctz(ir_node *node)
5163 return gen_unop_AM(node, new_bd_ia32_Bsf);
5167 * Transform builtin parity.
5169 static ir_node *gen_parity(ir_node *node)
5171 dbg_info *dbgi = get_irn_dbg_info(node);
5172 ir_node *block = get_nodes_block(node);
5173 ir_node *new_block = be_transform_node(block);
5174 ir_node *param = get_Builtin_param(node, 0);
5175 ir_node *new_param = be_transform_node(param);
5178 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5179 * so we have to do complicated xoring first.
5180 * (we should also better lower this before the backend so we still have a
5181 * chance for CSE, constant folding and other goodies for some of these
5184 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5185 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5186 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5188 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5191 set_irn_mode(xor2, mode_T);
5192 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5195 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5196 SET_IA32_ORIG_NODE(new_node, node);
5199 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5200 nomem, new_node, mode_Bu);
5201 SET_IA32_ORIG_NODE(new_node, node);
5206 * Transform builtin popcount
5208 static ir_node *gen_popcount(ir_node *node)
5210 ir_node *param = get_Builtin_param(node, 0);
5211 dbg_info *dbgi = get_irn_dbg_info(node);
5213 ir_node *block = get_nodes_block(node);
5214 ir_node *new_block = be_transform_node(block);
5217 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5219 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5220 if (ia32_cg_config.use_popcnt) {
5221 ia32_address_mode_t am;
5222 ia32_address_t *addr = &am.addr;
5225 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5227 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5228 set_am_attributes(cnt, &am);
5229 set_ia32_ls_mode(cnt, get_irn_mode(param));
5231 SET_IA32_ORIG_NODE(cnt, node);
5232 return fix_mem_proj(cnt, &am);
5235 new_param = be_transform_node(param);
5237 /* do the standard popcount algo */
5238 /* TODO: This is stupid, we should transform this before the backend,
5239 * to get CSE, localopts, etc. for the operations
5240 * TODO: This is also not the optimal algorithm (it is just the starting
5241 * example in hackers delight, they optimize it more on the following page)
5242 * But I'm too lazy to fix this now, as the code should get lowered before
5243 * the backend anyway.
5246 /* m1 = x & 0x55555555 */
5247 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5248 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5251 simm = ia32_create_Immediate(NULL, 0, 1);
5252 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5254 /* m2 = s1 & 0x55555555 */
5255 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5258 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5260 /* m4 = m3 & 0x33333333 */
5261 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5262 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5265 simm = ia32_create_Immediate(NULL, 0, 2);
5266 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5268 /* m5 = s2 & 0x33333333 */
5269 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5272 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5274 /* m7 = m6 & 0x0F0F0F0F */
5275 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5276 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5279 simm = ia32_create_Immediate(NULL, 0, 4);
5280 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5282 /* m8 = s3 & 0x0F0F0F0F */
5283 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5286 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5288 /* m10 = m9 & 0x00FF00FF */
5289 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5290 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5293 simm = ia32_create_Immediate(NULL, 0, 8);
5294 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5296 /* m11 = s4 & 0x00FF00FF */
5297 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5299 /* m12 = m10 + m11 */
5300 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5302 /* m13 = m12 & 0x0000FFFF */
5303 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5304 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5306 /* s5 = m12 >> 16 */
5307 simm = ia32_create_Immediate(NULL, 0, 16);
5308 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5310 /* res = m13 + s5 */
5311 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5315 * Transform builtin byte swap.
5317 static ir_node *gen_bswap(ir_node *node)
5319 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5320 dbg_info *dbgi = get_irn_dbg_info(node);
5322 ir_node *block = get_nodes_block(node);
5323 ir_node *new_block = be_transform_node(block);
5324 ir_mode *mode = get_irn_mode(param);
5325 unsigned size = get_mode_size_bits(mode);
5326 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5330 if (ia32_cg_config.use_i486) {
5331 /* swap available */
5332 return new_bd_ia32_Bswap(dbgi, new_block, param);
5334 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5335 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5337 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5338 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5340 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5342 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5343 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5345 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5346 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5349 /* swap16 always available */
5350 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5353 panic("Invalid bswap size (%d)", size);
5358 * Transform builtin outport.
5360 static ir_node *gen_outport(ir_node *node)
5362 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5363 ir_node *oldv = get_Builtin_param(node, 1);
5364 ir_mode *mode = get_irn_mode(oldv);
5365 ir_node *value = be_transform_node(oldv);
5366 ir_node *block = be_transform_node(get_nodes_block(node));
5367 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5368 dbg_info *dbgi = get_irn_dbg_info(node);
5370 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5371 set_ia32_ls_mode(res, mode);
5376 * Transform builtin inport.
5378 static ir_node *gen_inport(ir_node *node)
5380 ir_type *tp = get_Builtin_type(node);
5381 ir_type *rstp = get_method_res_type(tp, 0);
5382 ir_mode *mode = get_type_mode(rstp);
5383 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5384 ir_node *block = be_transform_node(get_nodes_block(node));
5385 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5386 dbg_info *dbgi = get_irn_dbg_info(node);
5388 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5389 set_ia32_ls_mode(res, mode);
5391 /* check for missing Result Proj */
5396 * Transform a builtin inner trampoline
5398 static ir_node *gen_inner_trampoline(ir_node *node)
5400 ir_node *ptr = get_Builtin_param(node, 0);
5401 ir_node *callee = get_Builtin_param(node, 1);
5402 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5403 ir_node *mem = get_Builtin_mem(node);
5404 ir_node *block = get_nodes_block(node);
5405 ir_node *new_block = be_transform_node(block);
5409 ir_node *trampoline;
5411 dbg_info *dbgi = get_irn_dbg_info(node);
5412 ia32_address_t addr;
5414 /* construct store address */
5415 memset(&addr, 0, sizeof(addr));
5416 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5418 if (addr.base == NULL) {
5419 addr.base = noreg_GP;
5421 addr.base = be_transform_node(addr.base);
5424 if (addr.index == NULL) {
5425 addr.index = noreg_GP;
5427 addr.index = be_transform_node(addr.index);
5429 addr.mem = be_transform_node(mem);
5431 /* mov ecx, <env> */
5432 val = ia32_create_Immediate(NULL, 0, 0xB9);
5433 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5434 addr.index, addr.mem, val);
5435 set_irn_pinned(store, get_irn_pinned(node));
5436 set_ia32_op_type(store, ia32_AddrModeD);
5437 set_ia32_ls_mode(store, mode_Bu);
5438 set_address(store, &addr);
5442 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5443 addr.index, addr.mem, env);
5444 set_irn_pinned(store, get_irn_pinned(node));
5445 set_ia32_op_type(store, ia32_AddrModeD);
5446 set_ia32_ls_mode(store, mode_Iu);
5447 set_address(store, &addr);
5451 /* jmp rel <callee> */
5452 val = ia32_create_Immediate(NULL, 0, 0xE9);
5453 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5454 addr.index, addr.mem, val);
5455 set_irn_pinned(store, get_irn_pinned(node));
5456 set_ia32_op_type(store, ia32_AddrModeD);
5457 set_ia32_ls_mode(store, mode_Bu);
5458 set_address(store, &addr);
5462 trampoline = be_transform_node(ptr);
5464 /* the callee is typically an immediate */
5465 if (is_SymConst(callee)) {
5466 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5468 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5469 add_ia32_am_offs_int(rel, -10);
5471 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5473 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5474 addr.index, addr.mem, rel);
5475 set_irn_pinned(store, get_irn_pinned(node));
5476 set_ia32_op_type(store, ia32_AddrModeD);
5477 set_ia32_ls_mode(store, mode_Iu);
5478 set_address(store, &addr);
5483 return new_r_Tuple(new_block, 2, in);
5487 * Transform Builtin node.
5489 static ir_node *gen_Builtin(ir_node *node)
5491 ir_builtin_kind kind = get_Builtin_kind(node);
5495 return gen_trap(node);
5496 case ir_bk_debugbreak:
5497 return gen_debugbreak(node);
5498 case ir_bk_return_address:
5499 return gen_return_address(node);
5500 case ir_bk_frame_address:
5501 return gen_frame_address(node);
5502 case ir_bk_prefetch:
5503 return gen_prefetch(node);
5505 return gen_ffs(node);
5507 return gen_clz(node);
5509 return gen_ctz(node);
5511 return gen_parity(node);
5512 case ir_bk_popcount:
5513 return gen_popcount(node);
5515 return gen_bswap(node);
5517 return gen_outport(node);
5519 return gen_inport(node);
5520 case ir_bk_inner_trampoline:
5521 return gen_inner_trampoline(node);
5523 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5527 * Transform Proj(Builtin) node.
5529 static ir_node *gen_Proj_Builtin(ir_node *proj)
5531 ir_node *node = get_Proj_pred(proj);
5532 ir_node *new_node = be_transform_node(node);
5533 ir_builtin_kind kind = get_Builtin_kind(node);
5536 case ir_bk_return_address:
5537 case ir_bk_frame_address:
5542 case ir_bk_popcount:
5544 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5547 case ir_bk_debugbreak:
5548 case ir_bk_prefetch:
5550 assert(get_Proj_proj(proj) == pn_Builtin_M);
5553 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5554 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5556 assert(get_Proj_proj(proj) == pn_Builtin_M);
5557 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5559 case ir_bk_inner_trampoline:
5560 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5561 return get_Tuple_pred(new_node, 1);
5563 assert(get_Proj_proj(proj) == pn_Builtin_M);
5564 return get_Tuple_pred(new_node, 0);
5567 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5570 static ir_node *gen_be_IncSP(ir_node *node)
5572 ir_node *res = be_duplicate_node(node);
5573 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5579 * Transform the Projs from a be_Call.
5581 static ir_node *gen_Proj_be_Call(ir_node *node)
5583 ir_node *call = get_Proj_pred(node);
5584 ir_node *new_call = be_transform_node(call);
5585 dbg_info *dbgi = get_irn_dbg_info(node);
5586 long proj = get_Proj_proj(node);
5587 ir_mode *mode = get_irn_mode(node);
5590 if (proj == pn_be_Call_M) {
5591 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5593 /* transform call modes */
5594 if (mode_is_data(mode)) {
5595 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5599 /* Map from be_Call to ia32_Call proj number */
5600 if (proj == pn_be_Call_sp) {
5601 proj = pn_ia32_Call_stack;
5602 } else if (proj == pn_be_Call_M) {
5603 proj = pn_ia32_Call_M;
5604 } else if (proj == pn_be_Call_X_except) {
5605 proj = pn_ia32_Call_X_except;
5606 } else if (proj == pn_be_Call_X_regular) {
5607 proj = pn_ia32_Call_X_regular;
5609 arch_register_req_t const *const req = arch_get_register_req_out(node);
5610 int const n_outs = arch_irn_get_n_outs(new_call);
5613 assert(proj >= pn_be_Call_first_res);
5614 assert(req->type & arch_register_req_type_limited);
5616 for (i = 0; i < n_outs; ++i) {
5617 arch_register_req_t const *const new_req
5618 = arch_get_out_register_req(new_call, i);
5620 if (!(new_req->type & arch_register_req_type_limited) ||
5621 new_req->cls != req->cls ||
5622 *new_req->limited != *req->limited)
5631 res = new_rd_Proj(dbgi, new_call, mode, proj);
5633 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5635 case pn_ia32_Call_stack:
5636 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5639 case pn_ia32_Call_fpcw:
5640 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5648 * Transform the Projs from a Cmp.
5650 static ir_node *gen_Proj_Cmp(ir_node *node)
5652 /* this probably means not all mode_b nodes were lowered... */
5653 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5657 static ir_node *gen_Proj_ASM(ir_node *node)
5659 ir_mode *mode = get_irn_mode(node);
5660 ir_node *pred = get_Proj_pred(node);
5661 ir_node *new_pred = be_transform_node(pred);
5662 long pos = get_Proj_proj(node);
5664 if (mode == mode_M) {
5665 pos = arch_irn_get_n_outs(new_pred)-1;
5666 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5668 } else if (mode_is_float(mode)) {
5671 panic("unexpected proj mode at ASM");
5674 return new_r_Proj(new_pred, mode, pos);
5678 * Transform and potentially renumber Proj nodes.
5680 static ir_node *gen_Proj(ir_node *node)
5682 ir_node *pred = get_Proj_pred(node);
5685 switch (get_irn_opcode(pred)) {
5687 return gen_Proj_Load(node);
5689 return gen_Proj_Store(node);
5691 return gen_Proj_ASM(node);
5693 return gen_Proj_Builtin(node);
5695 return gen_Proj_Div(node);
5697 return gen_Proj_Mod(node);
5699 return gen_Proj_CopyB(node);
5701 return gen_Proj_be_SubSP(node);
5703 return gen_Proj_be_AddSP(node);
5705 return gen_Proj_be_Call(node);
5707 return gen_Proj_Cmp(node);
5709 proj = get_Proj_proj(node);
5711 case pn_Start_X_initial_exec: {
5712 ir_node *block = get_nodes_block(pred);
5713 ir_node *new_block = be_transform_node(block);
5714 dbg_info *dbgi = get_irn_dbg_info(node);
5715 /* we exchange the ProjX with a jump */
5716 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5724 if (is_ia32_l_FloattoLL(pred)) {
5725 return gen_Proj_l_FloattoLL(node);
5727 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5731 ir_mode *mode = get_irn_mode(node);
5732 if (ia32_mode_needs_gp_reg(mode)) {
5733 ir_node *new_pred = be_transform_node(pred);
5734 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5735 get_Proj_proj(node));
5736 new_proj->node_nr = node->node_nr;
5741 return be_duplicate_node(node);
5745 * Enters all transform functions into the generic pointer
5747 static void register_transformers(void)
5749 /* first clear the generic function pointer for all ops */
5750 be_start_transform_setup();
5752 be_set_transform_function(op_Add, gen_Add);
5753 be_set_transform_function(op_And, gen_And);
5754 be_set_transform_function(op_ASM, ia32_gen_ASM);
5755 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5756 be_set_transform_function(op_be_Call, gen_be_Call);
5757 be_set_transform_function(op_be_Copy, gen_be_Copy);
5758 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5759 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5760 be_set_transform_function(op_be_Return, gen_be_Return);
5761 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5762 be_set_transform_function(op_Builtin, gen_Builtin);
5763 be_set_transform_function(op_Cmp, gen_Cmp);
5764 be_set_transform_function(op_Cond, gen_Cond);
5765 be_set_transform_function(op_Const, gen_Const);
5766 be_set_transform_function(op_Conv, gen_Conv);
5767 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5768 be_set_transform_function(op_Div, gen_Div);
5769 be_set_transform_function(op_Eor, gen_Eor);
5770 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5771 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5772 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5773 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5774 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5775 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5776 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5777 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5778 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5779 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5780 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5781 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5782 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5783 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5784 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5785 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5786 be_set_transform_function(op_IJmp, gen_IJmp);
5787 be_set_transform_function(op_Jmp, gen_Jmp);
5788 be_set_transform_function(op_Load, gen_Load);
5789 be_set_transform_function(op_Minus, gen_Minus);
5790 be_set_transform_function(op_Mod, gen_Mod);
5791 be_set_transform_function(op_Mul, gen_Mul);
5792 be_set_transform_function(op_Mulh, gen_Mulh);
5793 be_set_transform_function(op_Mux, gen_Mux);
5794 be_set_transform_function(op_Not, gen_Not);
5795 be_set_transform_function(op_Or, gen_Or);
5796 be_set_transform_function(op_Phi, gen_Phi);
5797 be_set_transform_function(op_Proj, gen_Proj);
5798 be_set_transform_function(op_Rotl, gen_Rotl);
5799 be_set_transform_function(op_Shl, gen_Shl);
5800 be_set_transform_function(op_Shr, gen_Shr);
5801 be_set_transform_function(op_Shrs, gen_Shrs);
5802 be_set_transform_function(op_Store, gen_Store);
5803 be_set_transform_function(op_Sub, gen_Sub);
5804 be_set_transform_function(op_SymConst, gen_SymConst);
5805 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5809 * Pre-transform all unknown and noreg nodes.
5811 static void ia32_pretransform_node(void)
5813 ir_graph *irg = current_ir_graph;
5814 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5816 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5817 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5818 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5819 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5820 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5822 nomem = get_irg_no_mem(irg);
5823 noreg_GP = ia32_new_NoReg_gp(irg);
5827 * Post-process all calls if we are in SSE mode.
5828 * The ABI requires that the results are in st0, copy them
5829 * to a xmm register.
5831 static void postprocess_fp_call_results(void)
5835 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5836 ir_node *call = call_list[i];
5837 ir_type *mtp = call_types[i];
5840 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5841 ir_type *res_tp = get_method_res_type(mtp, j);
5842 ir_node *res, *new_res;
5843 const ir_edge_t *edge, *next;
5846 if (! is_atomic_type(res_tp)) {
5847 /* no floating point return */
5850 res_mode = get_type_mode(res_tp);
5851 if (! mode_is_float(res_mode)) {
5852 /* no floating point return */
5856 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5859 /* now patch the users */
5860 foreach_out_edge_safe(res, edge, next) {
5861 ir_node *succ = get_edge_src_irn(edge);
5864 if (be_is_Keep(succ))
5867 if (is_ia32_xStore(succ)) {
5868 /* an xStore can be patched into an vfst */
5869 dbg_info *db = get_irn_dbg_info(succ);
5870 ir_node *block = get_nodes_block(succ);
5871 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5872 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5873 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5874 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5875 ir_mode *mode = get_ia32_ls_mode(succ);
5877 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5878 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5879 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5880 if (is_ia32_use_frame(succ))
5881 set_ia32_use_frame(st);
5882 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5883 set_irn_pinned(st, get_irn_pinned(succ));
5884 set_ia32_op_type(st, ia32_AddrModeD);
5886 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5887 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5888 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5891 } else if (new_res == NULL) {
5892 dbg_info *db = get_irn_dbg_info(call);
5893 ir_node *block = get_nodes_block(call);
5894 ir_node *frame = get_irg_frame(current_ir_graph);
5895 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5896 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5897 ir_node *vfst, *xld, *new_mem;
5900 /* store st(0) on stack */
5901 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5903 set_ia32_op_type(vfst, ia32_AddrModeD);
5904 set_ia32_use_frame(vfst);
5906 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5908 /* load into SSE register */
5909 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5911 set_ia32_op_type(xld, ia32_AddrModeS);
5912 set_ia32_use_frame(xld);
5914 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5915 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5917 if (old_mem != NULL) {
5918 edges_reroute(old_mem, new_mem);
5922 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5928 /* do the transformation */
5929 void ia32_transform_graph(ir_graph *irg)
5933 register_transformers();
5934 initial_fpcw = NULL;
5935 ia32_no_pic_adjust = 0;
5937 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5939 be_timer_push(T_HEIGHTS);
5940 ia32_heights = heights_new(irg);
5941 be_timer_pop(T_HEIGHTS);
5942 ia32_calculate_non_address_mode_nodes(irg);
5944 /* the transform phase is not safe for CSE (yet) because several nodes get
5945 * attributes set after their creation */
5946 cse_last = get_opt_cse();
5949 call_list = NEW_ARR_F(ir_node *, 0);
5950 call_types = NEW_ARR_F(ir_type *, 0);
5951 be_transform_graph(irg, ia32_pretransform_node);
5953 if (ia32_cg_config.use_sse2)
5954 postprocess_fp_call_results();
5955 DEL_ARR_F(call_types);
5956 DEL_ARR_F(call_list);
5958 set_opt_cse(cse_last);
5960 ia32_free_non_address_mode_nodes();
5961 heights_free(ia32_heights);
5962 ia32_heights = NULL;
5965 void ia32_init_transform(void)
5967 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");