2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
884 am->op_type = ia32_Normal;
886 if (flags & match_try_am) {
892 mode = get_irn_mode(op2);
893 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
894 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
896 new_op2 = create_upconv(op2, NULL);
897 am->ls_mode = mode_Iu;
899 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
901 new_op2 = be_transform_node(op2);
902 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
905 if (addr->base == NULL)
906 addr->base = noreg_GP;
907 if (addr->index == NULL)
908 addr->index = noreg_GP;
909 if (addr->mem == NULL)
912 am->new_op1 = new_op1;
913 am->new_op2 = new_op2;
914 am->commutative = commutative;
918 * "Fixes" a node that uses address mode by turning it into mode_T
919 * and returning a pn_ia32_res Proj.
921 * @param node the node
922 * @param am its address mode
924 * @return a Proj(pn_ia32_res) if a memory address mode is used,
927 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
932 if (am->mem_proj == NULL)
935 /* we have to create a mode_T so the old MemProj can attach to us */
936 mode = get_irn_mode(node);
937 load = get_Proj_pred(am->mem_proj);
939 be_set_transformed_node(load, node);
941 if (mode != mode_T) {
942 set_irn_mode(node, mode_T);
943 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
950 * Construct a standard binary operation, set AM and immediate if required.
952 * @param node The original node for which the binop is created
953 * @param op1 The first operand
954 * @param op2 The second operand
955 * @param func The node constructor function
956 * @return The constructed ia32 node.
958 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
959 construct_binop_func *func, match_flags_t flags)
962 ir_node *block, *new_block, *new_node;
963 ia32_address_mode_t am;
964 ia32_address_t *addr = &am.addr;
966 block = get_nodes_block(node);
967 match_arguments(&am, block, op1, op2, NULL, flags);
969 dbgi = get_irn_dbg_info(node);
970 new_block = be_transform_node(block);
971 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
972 am.new_op1, am.new_op2);
973 set_am_attributes(new_node, &am);
974 /* we can't use source address mode anymore when using immediates */
975 if (!(flags & match_am_and_immediates) &&
976 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
977 set_ia32_am_support(new_node, ia32_am_none);
978 SET_IA32_ORIG_NODE(new_node, node);
980 new_node = fix_mem_proj(new_node, &am);
986 * Generic names for the inputs of an ia32 binary op.
989 n_ia32_l_binop_left, /**< ia32 left input */
990 n_ia32_l_binop_right, /**< ia32 right input */
991 n_ia32_l_binop_eflags /**< ia32 eflags input */
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1001 * Construct a binary operation which also consumes the eflags.
1003 * @param node The node to transform
1004 * @param func The node constructor function
1005 * @param flags The match flags
1006 * @return The constructor ia32 node
1008 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1009 match_flags_t flags)
1011 ir_node *src_block = get_nodes_block(node);
1012 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1013 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1014 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1016 ir_node *block, *new_node, *new_eflags;
1017 ia32_address_mode_t am;
1018 ia32_address_t *addr = &am.addr;
1020 match_arguments(&am, src_block, op1, op2, eflags, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 block = be_transform_node(src_block);
1024 new_eflags = be_transform_node(eflags);
1025 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1026 am.new_op1, am.new_op2, new_eflags);
1027 set_am_attributes(new_node, &am);
1028 /* we can't use source address mode anymore when using immediates */
1029 if (!(flags & match_am_and_immediates) &&
1030 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1031 set_ia32_am_support(new_node, ia32_am_none);
1032 SET_IA32_ORIG_NODE(new_node, node);
1034 new_node = fix_mem_proj(new_node, &am);
1039 static ir_node *get_fpcw(void)
1041 if (initial_fpcw != NULL)
1042 return initial_fpcw;
1044 initial_fpcw = be_transform_node(old_initial_fpcw);
1045 return initial_fpcw;
1049 * Construct a standard binary operation, set AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_binop_float_func *func)
1059 ir_mode *mode = get_irn_mode(node);
1061 ir_node *block, *new_block, *new_node;
1062 ia32_address_mode_t am;
1063 ia32_address_t *addr = &am.addr;
1064 ia32_x87_attr_t *attr;
1065 /* All operations are considered commutative, because there are reverse
1067 match_flags_t flags = match_commutative;
1069 /* happens for div nodes... */
1070 if (mode == mode_T) {
1072 mode = get_Div_resmode(node);
1074 panic("can't determine mode");
1077 /* cannot use address mode with long double on x87 */
1078 if (get_mode_size_bits(mode) <= 64)
1081 block = get_nodes_block(node);
1082 match_arguments(&am, block, op1, op2, NULL, flags);
1084 dbgi = get_irn_dbg_info(node);
1085 new_block = be_transform_node(block);
1086 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1087 am.new_op1, am.new_op2, get_fpcw());
1088 set_am_attributes(new_node, &am);
1090 attr = get_ia32_x87_attr(new_node);
1091 attr->attr.data.ins_permuted = am.ins_permuted;
1093 SET_IA32_ORIG_NODE(new_node, node);
1095 new_node = fix_mem_proj(new_node, &am);
1101 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1103 * @param op1 The first operand
1104 * @param op2 The second operand
1105 * @param func The node constructor function
1106 * @return The constructed ia32 node.
1108 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1109 construct_shift_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1114 ir_mode *mode = get_irn_mode(node);
1116 assert(! mode_is_float(mode));
1117 assert(flags & match_immediate);
1118 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1120 if (get_mode_modulo_shift(mode) != 32)
1121 panic("modulo shift!=32 not supported by ia32 backend");
1123 if (flags & match_mode_neutral) {
1124 op1 = ia32_skip_downconv(op1);
1125 new_op1 = be_transform_node(op1);
1126 } else if (get_mode_size_bits(mode) != 32) {
1127 new_op1 = create_upconv(op1, node);
1129 new_op1 = be_transform_node(op1);
1132 /* the shift amount can be any mode that is bigger than 5 bits, since all
1133 * other bits are ignored anyway */
1134 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1135 ir_node *const op = get_Conv_op(op2);
1136 if (mode_is_float(get_irn_mode(op)))
1139 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1141 new_op2 = create_immediate_or_transform(op2, 0);
1143 dbgi = get_irn_dbg_info(node);
1144 block = get_nodes_block(node);
1145 new_block = be_transform_node(block);
1146 new_node = func(dbgi, new_block, new_op1, new_op2);
1147 SET_IA32_ORIG_NODE(new_node, node);
1149 /* lowered shift instruction may have a dependency operand, handle it here */
1150 if (get_irn_arity(node) == 3) {
1151 /* we have a dependency */
1152 ir_node* dep = get_irn_n(node, 2);
1153 if (get_irn_n_edges(dep) > 1) {
1154 /* ... which has at least one user other than 'node' */
1155 ir_node *new_dep = be_transform_node(dep);
1156 add_irn_dep(new_node, new_dep);
1165 * Construct a standard unary operation, set AM and immediate if required.
1167 * @param op The operand
1168 * @param func The node constructor function
1169 * @return The constructed ia32 node.
1171 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1172 match_flags_t flags)
1175 ir_node *block, *new_block, *new_op, *new_node;
1177 assert(flags == 0 || flags == match_mode_neutral);
1178 if (flags & match_mode_neutral) {
1179 op = ia32_skip_downconv(op);
1182 new_op = be_transform_node(op);
1183 dbgi = get_irn_dbg_info(node);
1184 block = get_nodes_block(node);
1185 new_block = be_transform_node(block);
1186 new_node = func(dbgi, new_block, new_op);
1188 SET_IA32_ORIG_NODE(new_node, node);
1193 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1194 ia32_address_t *addr)
1204 base = be_transform_node(base);
1211 idx = be_transform_node(idx);
1214 /* segment overrides are ineffective for Leas :-( so we have to patch
1216 if (addr->tls_segment) {
1217 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1218 assert(addr->symconst_ent != NULL);
1219 if (base == noreg_GP)
1222 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1223 addr->tls_segment = false;
1226 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1227 set_address(res, addr);
1233 * Returns non-zero if a given address mode has a symbolic or
1234 * numerical offset != 0.
1236 static int am_has_immediates(const ia32_address_t *addr)
1238 return addr->offset != 0 || addr->symconst_ent != NULL
1239 || addr->frame_entity || addr->use_frame;
1243 * Creates an ia32 Add.
1245 * @return the created ia32 Add node
1247 static ir_node *gen_Add(ir_node *node)
1249 ir_mode *mode = get_irn_mode(node);
1250 ir_node *op1 = get_Add_left(node);
1251 ir_node *op2 = get_Add_right(node);
1253 ir_node *block, *new_block, *new_node, *add_immediate_op;
1254 ia32_address_t addr;
1255 ia32_address_mode_t am;
1257 if (mode_is_float(mode)) {
1258 if (ia32_cg_config.use_sse2)
1259 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1260 match_commutative | match_am);
1262 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1265 ia32_mark_non_am(node);
1267 op2 = ia32_skip_downconv(op2);
1268 op1 = ia32_skip_downconv(op1);
1272 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1273 * 1. Add with immediate -> Lea
1274 * 2. Add with possible source address mode -> Add
1275 * 3. Otherwise -> Lea
1277 memset(&addr, 0, sizeof(addr));
1278 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1279 add_immediate_op = NULL;
1281 dbgi = get_irn_dbg_info(node);
1282 block = get_nodes_block(node);
1283 new_block = be_transform_node(block);
1286 if (addr.base == NULL && addr.index == NULL) {
1287 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1288 addr.symconst_sign, 0, addr.offset);
1289 SET_IA32_ORIG_NODE(new_node, node);
1292 /* add with immediate? */
1293 if (addr.index == NULL) {
1294 add_immediate_op = addr.base;
1295 } else if (addr.base == NULL && addr.scale == 0) {
1296 add_immediate_op = addr.index;
1299 if (add_immediate_op != NULL) {
1300 if (!am_has_immediates(&addr)) {
1301 #ifdef DEBUG_libfirm
1302 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1305 return be_transform_node(add_immediate_op);
1308 new_node = create_lea_from_address(dbgi, new_block, &addr);
1309 SET_IA32_ORIG_NODE(new_node, node);
1313 /* test if we can use source address mode */
1314 match_arguments(&am, block, op1, op2, NULL, match_commutative
1315 | match_mode_neutral | match_am | match_immediate | match_try_am);
1317 /* construct an Add with source address mode */
1318 if (am.op_type == ia32_AddrModeS) {
1319 ia32_address_t *am_addr = &am.addr;
1320 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1321 am_addr->index, am_addr->mem, am.new_op1,
1323 set_am_attributes(new_node, &am);
1324 SET_IA32_ORIG_NODE(new_node, node);
1326 new_node = fix_mem_proj(new_node, &am);
1331 /* otherwise construct a lea */
1332 new_node = create_lea_from_address(dbgi, new_block, &addr);
1333 SET_IA32_ORIG_NODE(new_node, node);
1338 * Creates an ia32 Mul.
1340 * @return the created ia32 Mul node
1342 static ir_node *gen_Mul(ir_node *node)
1344 ir_node *op1 = get_Mul_left(node);
1345 ir_node *op2 = get_Mul_right(node);
1346 ir_mode *mode = get_irn_mode(node);
1348 if (mode_is_float(mode)) {
1349 if (ia32_cg_config.use_sse2)
1350 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1351 match_commutative | match_am);
1353 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1355 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1356 match_commutative | match_am | match_mode_neutral |
1357 match_immediate | match_am_and_immediates);
1361 * Creates an ia32 Mulh.
1362 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1363 * this result while Mul returns the lower 32 bit.
1365 * @return the created ia32 Mulh node
1367 static ir_node *gen_Mulh(ir_node *node)
1369 dbg_info *dbgi = get_irn_dbg_info(node);
1370 ir_node *op1 = get_Mulh_left(node);
1371 ir_node *op2 = get_Mulh_right(node);
1372 ir_mode *mode = get_irn_mode(node);
1374 ir_node *proj_res_high;
1376 if (get_mode_size_bits(mode) != 32) {
1377 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1380 if (mode_is_signed(mode)) {
1381 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1382 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1384 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1385 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1387 return proj_res_high;
1391 * Creates an ia32 And.
1393 * @return The created ia32 And node
1395 static ir_node *gen_And(ir_node *node)
1397 ir_node *op1 = get_And_left(node);
1398 ir_node *op2 = get_And_right(node);
1399 assert(! mode_is_float(get_irn_mode(node)));
1401 /* is it a zero extension? */
1402 if (is_Const(op2)) {
1403 ir_tarval *tv = get_Const_tarval(op2);
1404 long v = get_tarval_long(tv);
1406 if (v == 0xFF || v == 0xFFFF) {
1407 dbg_info *dbgi = get_irn_dbg_info(node);
1408 ir_node *block = get_nodes_block(node);
1415 assert(v == 0xFFFF);
1418 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1423 return gen_binop(node, op1, op2, new_bd_ia32_And,
1424 match_commutative | match_mode_neutral | match_am | match_immediate);
1428 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1431 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1433 if (is_Const(value1) && is_Const(value2)) {
1434 ir_tarval *tv1 = get_Const_tarval(value1);
1435 ir_tarval *tv2 = get_Const_tarval(value2);
1436 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1437 long v1 = get_tarval_long(tv1);
1438 long v2 = get_tarval_long(tv2);
1439 return v1 <= v2 && v2 == 32-v1;
1445 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1446 ir_node *high, ir_node *low,
1450 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1451 * op1 - target to be shifted
1452 * op2 - contains bits to be shifted into target
1454 * Only op3 can be an immediate.
1456 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1457 ir_node *high, ir_node *low, ir_node *count,
1458 new_shiftd_func func)
1460 ir_node *new_block = be_transform_node(block);
1461 ir_node *new_high = be_transform_node(high);
1462 ir_node *new_low = be_transform_node(low);
1466 /* the shift amount can be any mode that is bigger than 5 bits, since all
1467 * other bits are ignored anyway */
1468 while (is_Conv(count) &&
1469 get_irn_n_edges(count) == 1 &&
1470 mode_is_int(get_irn_mode(count))) {
1471 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1472 count = get_Conv_op(count);
1474 new_count = create_immediate_or_transform(count, 0);
1476 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1480 static ir_node *match_64bit_shift(ir_node *node)
1482 ir_node *op1 = get_Or_left(node);
1483 ir_node *op2 = get_Or_right(node);
1491 /* match ShlD operation */
1492 if (is_Shl(op1) && is_Shr(op2)) {
1493 ir_node *shl_right = get_Shl_right(op1);
1494 ir_node *shl_left = get_Shl_left(op1);
1495 ir_node *shr_right = get_Shr_right(op2);
1496 ir_node *shr_left = get_Shr_left(op2);
1497 /* constant ShlD operation */
1498 if (is_complementary_shifts(shl_right, shr_right)) {
1499 dbg_info *dbgi = get_irn_dbg_info(node);
1500 ir_node *block = get_nodes_block(node);
1501 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1504 /* constant ShrD operation */
1505 if (is_complementary_shifts(shr_right, shl_right)) {
1506 dbg_info *dbgi = get_irn_dbg_info(node);
1507 ir_node *block = get_nodes_block(node);
1508 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1511 /* lower_dw produces the following for ShlD:
1512 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1513 if (is_Shr(shr_left) && is_Not(shr_right)
1514 && is_Const_1(get_Shr_right(shr_left))
1515 && get_Not_op(shr_right) == shl_right) {
1516 dbg_info *dbgi = get_irn_dbg_info(node);
1517 ir_node *block = get_nodes_block(node);
1518 ir_node *val_h = get_Shr_left(shr_left);
1519 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1522 /* lower_dw produces the following for ShrD:
1523 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1524 if (is_Shl(shl_left) && is_Not(shl_right)
1525 && is_Const_1(get_Shl_right(shl_left))
1526 && get_Not_op(shl_right) == shr_right) {
1527 dbg_info *dbgi = get_irn_dbg_info(node);
1528 ir_node *block = get_nodes_block(node);
1529 ir_node *val_h = get_Shl_left(shl_left);
1530 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1539 * Creates an ia32 Or.
1541 * @return The created ia32 Or node
1543 static ir_node *gen_Or(ir_node *node)
1545 ir_node *op1 = get_Or_left(node);
1546 ir_node *op2 = get_Or_right(node);
1549 res = match_64bit_shift(node);
1553 assert (! mode_is_float(get_irn_mode(node)));
1554 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1555 | match_mode_neutral | match_am | match_immediate);
1561 * Creates an ia32 Eor.
1563 * @return The created ia32 Eor node
1565 static ir_node *gen_Eor(ir_node *node)
1567 ir_node *op1 = get_Eor_left(node);
1568 ir_node *op2 = get_Eor_right(node);
1570 assert(! mode_is_float(get_irn_mode(node)));
1571 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1572 | match_mode_neutral | match_am | match_immediate);
1577 * Creates an ia32 Sub.
1579 * @return The created ia32 Sub node
1581 static ir_node *gen_Sub(ir_node *node)
1583 ir_node *op1 = get_Sub_left(node);
1584 ir_node *op2 = get_Sub_right(node);
1585 ir_mode *mode = get_irn_mode(node);
1587 if (mode_is_float(mode)) {
1588 if (ia32_cg_config.use_sse2)
1589 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1591 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1594 if (is_Const(op2)) {
1595 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1599 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1600 | match_am | match_immediate);
1603 static ir_node *transform_AM_mem(ir_node *const block,
1604 ir_node *const src_val,
1605 ir_node *const src_mem,
1606 ir_node *const am_mem)
1608 if (is_NoMem(am_mem)) {
1609 return be_transform_node(src_mem);
1610 } else if (is_Proj(src_val) &&
1612 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1613 /* avoid memory loop */
1615 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1616 ir_node *const ptr_pred = get_Proj_pred(src_val);
1617 int const arity = get_Sync_n_preds(src_mem);
1622 NEW_ARR_A(ir_node*, ins, arity + 1);
1624 /* NOTE: This sometimes produces dead-code because the old sync in
1625 * src_mem might not be used anymore, we should detect this case
1626 * and kill the sync... */
1627 for (i = arity - 1; i >= 0; --i) {
1628 ir_node *const pred = get_Sync_pred(src_mem, i);
1630 /* avoid memory loop */
1631 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1634 ins[n++] = be_transform_node(pred);
1637 if (n==1 && ins[0] == am_mem) {
1639 /* creating a new Sync and relying on CSE may fail,
1640 * if am_mem is a ProjM, which does not yet verify. */
1644 return new_r_Sync(block, n, ins);
1648 ins[0] = be_transform_node(src_mem);
1650 return new_r_Sync(block, 2, ins);
1655 * Create a 32bit to 64bit signed extension.
1657 * @param dbgi debug info
1658 * @param block the block where node nodes should be placed
1659 * @param val the value to extend
1660 * @param orig the original node
1662 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1663 ir_node *val, const ir_node *orig)
1668 if (ia32_cg_config.use_short_sex_eax) {
1669 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1670 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1672 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1673 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1675 SET_IA32_ORIG_NODE(res, orig);
1680 * Generates an ia32 Div with additional infrastructure for the
1681 * register allocator if needed.
1683 static ir_node *create_Div(ir_node *node)
1685 dbg_info *dbgi = get_irn_dbg_info(node);
1686 ir_node *block = get_nodes_block(node);
1687 ir_node *new_block = be_transform_node(block);
1688 int throws_exception = ir_throws_exception(node);
1695 ir_node *sign_extension;
1696 ia32_address_mode_t am;
1697 ia32_address_t *addr = &am.addr;
1699 /* the upper bits have random contents for smaller modes */
1700 switch (get_irn_opcode(node)) {
1702 op1 = get_Div_left(node);
1703 op2 = get_Div_right(node);
1704 mem = get_Div_mem(node);
1705 mode = get_Div_resmode(node);
1708 op1 = get_Mod_left(node);
1709 op2 = get_Mod_right(node);
1710 mem = get_Mod_mem(node);
1711 mode = get_Mod_resmode(node);
1714 panic("invalid divmod node %+F", node);
1717 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1719 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1720 is the memory of the consumed address. We can have only the second op as address
1721 in Div nodes, so check only op2. */
1722 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1724 if (mode_is_signed(mode)) {
1725 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1726 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1727 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1729 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1731 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1732 addr->index, new_mem, am.new_op2,
1733 am.new_op1, sign_extension);
1735 ir_set_throws_exception(new_node, throws_exception);
1737 set_irn_pinned(new_node, get_irn_pinned(node));
1739 set_am_attributes(new_node, &am);
1740 SET_IA32_ORIG_NODE(new_node, node);
1742 new_node = fix_mem_proj(new_node, &am);
1748 * Generates an ia32 Mod.
1750 static ir_node *gen_Mod(ir_node *node)
1752 return create_Div(node);
1756 * Generates an ia32 Div.
1758 static ir_node *gen_Div(ir_node *node)
1760 ir_mode *mode = get_Div_resmode(node);
1761 if (mode_is_float(mode)) {
1762 ir_node *op1 = get_Div_left(node);
1763 ir_node *op2 = get_Div_right(node);
1765 if (ia32_cg_config.use_sse2) {
1766 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1768 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1772 return create_Div(node);
1776 * Creates an ia32 Shl.
1778 * @return The created ia32 Shl node
1780 static ir_node *gen_Shl(ir_node *node)
1782 ir_node *left = get_Shl_left(node);
1783 ir_node *right = get_Shl_right(node);
1785 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1786 match_mode_neutral | match_immediate);
1790 * Creates an ia32 Shr.
1792 * @return The created ia32 Shr node
1794 static ir_node *gen_Shr(ir_node *node)
1796 ir_node *left = get_Shr_left(node);
1797 ir_node *right = get_Shr_right(node);
1799 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1805 * Creates an ia32 Sar.
1807 * @return The created ia32 Shrs node
1809 static ir_node *gen_Shrs(ir_node *node)
1811 ir_node *left = get_Shrs_left(node);
1812 ir_node *right = get_Shrs_right(node);
1814 if (is_Const(right)) {
1815 ir_tarval *tv = get_Const_tarval(right);
1816 long val = get_tarval_long(tv);
1818 /* this is a sign extension */
1819 dbg_info *dbgi = get_irn_dbg_info(node);
1820 ir_node *block = be_transform_node(get_nodes_block(node));
1821 ir_node *new_op = be_transform_node(left);
1823 return create_sex_32_64(dbgi, block, new_op, node);
1827 /* 8 or 16 bit sign extension? */
1828 if (is_Const(right) && is_Shl(left)) {
1829 ir_node *shl_left = get_Shl_left(left);
1830 ir_node *shl_right = get_Shl_right(left);
1831 if (is_Const(shl_right)) {
1832 ir_tarval *tv1 = get_Const_tarval(right);
1833 ir_tarval *tv2 = get_Const_tarval(shl_right);
1834 if (tv1 == tv2 && tarval_is_long(tv1)) {
1835 long val = get_tarval_long(tv1);
1836 if (val == 16 || val == 24) {
1837 dbg_info *dbgi = get_irn_dbg_info(node);
1838 ir_node *block = get_nodes_block(node);
1848 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1857 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1863 * Creates an ia32 Rol.
1865 * @param op1 The first operator
1866 * @param op2 The second operator
1867 * @return The created ia32 RotL node
1869 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1871 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1877 * Creates an ia32 Ror.
1878 * NOTE: There is no RotR with immediate because this would always be a RotL
1879 * "imm-mode_size_bits" which can be pre-calculated.
1881 * @param op1 The first operator
1882 * @param op2 The second operator
1883 * @return The created ia32 RotR node
1885 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1887 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1893 * Creates an ia32 RotR or RotL (depending on the found pattern).
1895 * @return The created ia32 RotL or RotR node
1897 static ir_node *gen_Rotl(ir_node *node)
1899 ir_node *op1 = get_Rotl_left(node);
1900 ir_node *op2 = get_Rotl_right(node);
1902 if (is_Minus(op2)) {
1903 return gen_Ror(node, op1, get_Minus_op(op2));
1906 return gen_Rol(node, op1, op2);
1912 * Transforms a Minus node.
1914 * @return The created ia32 Minus node
1916 static ir_node *gen_Minus(ir_node *node)
1918 ir_node *op = get_Minus_op(node);
1919 ir_node *block = be_transform_node(get_nodes_block(node));
1920 dbg_info *dbgi = get_irn_dbg_info(node);
1921 ir_mode *mode = get_irn_mode(node);
1926 if (mode_is_float(mode)) {
1927 ir_node *new_op = be_transform_node(op);
1928 if (ia32_cg_config.use_sse2) {
1929 /* TODO: non-optimal... if we have many xXors, then we should
1930 * rather create a load for the const and use that instead of
1931 * several AM nodes... */
1932 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1934 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1935 noreg_GP, nomem, new_op, noreg_xmm);
1937 size = get_mode_size_bits(mode);
1938 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1940 set_ia32_am_sc(new_node, ent);
1941 set_ia32_op_type(new_node, ia32_AddrModeS);
1942 set_ia32_ls_mode(new_node, mode);
1944 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1947 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1950 SET_IA32_ORIG_NODE(new_node, node);
1956 * Transforms a Not node.
1958 * @return The created ia32 Not node
1960 static ir_node *gen_Not(ir_node *node)
1962 ir_node *op = get_Not_op(node);
1964 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1965 assert (! mode_is_float(get_irn_mode(node)));
1967 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1970 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1971 bool negate, ir_node *node)
1973 ir_node *new_block = be_transform_node(block);
1974 ir_mode *mode = get_irn_mode(op);
1975 ir_node *new_op = be_transform_node(op);
1980 assert(mode_is_float(mode));
1982 if (ia32_cg_config.use_sse2) {
1983 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1984 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1985 noreg_GP, nomem, new_op, noreg_fp);
1987 size = get_mode_size_bits(mode);
1988 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1990 set_ia32_am_sc(new_node, ent);
1992 SET_IA32_ORIG_NODE(new_node, node);
1994 set_ia32_op_type(new_node, ia32_AddrModeS);
1995 set_ia32_ls_mode(new_node, mode);
1997 /* TODO, implement -Abs case */
2000 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
2001 SET_IA32_ORIG_NODE(new_node, node);
2003 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2004 SET_IA32_ORIG_NODE(new_node, node);
2012 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2014 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2016 dbg_info *dbgi = get_irn_dbg_info(cmp);
2017 ir_node *block = get_nodes_block(cmp);
2018 ir_node *new_block = be_transform_node(block);
2019 ir_node *op1 = be_transform_node(x);
2020 ir_node *op2 = be_transform_node(n);
2022 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2025 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2028 if (mode_is_float(mode)) {
2030 case ir_relation_equal: return ia32_cc_float_equal;
2031 case ir_relation_less: return ia32_cc_float_below;
2032 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2033 case ir_relation_greater: return ia32_cc_float_above;
2034 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2035 case ir_relation_less_greater: return ia32_cc_not_equal;
2036 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2037 case ir_relation_unordered: return ia32_cc_parity;
2038 case ir_relation_unordered_equal: return ia32_cc_equal;
2039 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2040 case ir_relation_unordered_less_equal:
2041 return ia32_cc_float_unordered_below_equal;
2042 case ir_relation_unordered_greater:
2043 return ia32_cc_float_unordered_above;
2044 case ir_relation_unordered_greater_equal:
2045 return ia32_cc_float_unordered_above_equal;
2046 case ir_relation_unordered_less_greater:
2047 return ia32_cc_float_not_equal;
2048 case ir_relation_false:
2049 case ir_relation_true:
2050 /* should we introduce a jump always/jump never? */
2053 panic("Unexpected float pnc");
2054 } else if (mode_is_signed(mode)) {
2056 case ir_relation_unordered_equal:
2057 case ir_relation_equal: return ia32_cc_equal;
2058 case ir_relation_unordered_less:
2059 case ir_relation_less: return ia32_cc_less;
2060 case ir_relation_unordered_less_equal:
2061 case ir_relation_less_equal: return ia32_cc_less_equal;
2062 case ir_relation_unordered_greater:
2063 case ir_relation_greater: return ia32_cc_greater;
2064 case ir_relation_unordered_greater_equal:
2065 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2066 case ir_relation_unordered_less_greater:
2067 case ir_relation_less_greater: return ia32_cc_not_equal;
2068 case ir_relation_less_equal_greater:
2069 case ir_relation_unordered:
2070 case ir_relation_false:
2071 case ir_relation_true:
2072 /* introduce jump always/jump never? */
2075 panic("Unexpected pnc");
2078 case ir_relation_unordered_equal:
2079 case ir_relation_equal: return ia32_cc_equal;
2080 case ir_relation_unordered_less:
2081 case ir_relation_less: return ia32_cc_below;
2082 case ir_relation_unordered_less_equal:
2083 case ir_relation_less_equal: return ia32_cc_below_equal;
2084 case ir_relation_unordered_greater:
2085 case ir_relation_greater: return ia32_cc_above;
2086 case ir_relation_unordered_greater_equal:
2087 case ir_relation_greater_equal: return ia32_cc_above_equal;
2088 case ir_relation_unordered_less_greater:
2089 case ir_relation_less_greater: return ia32_cc_not_equal;
2090 case ir_relation_less_equal_greater:
2091 case ir_relation_unordered:
2092 case ir_relation_false:
2093 case ir_relation_true:
2094 /* introduce jump always/jump never? */
2097 panic("Unexpected pnc");
2101 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2103 /* must have a Cmp as input */
2104 ir_relation relation = get_Cmp_relation(cmp);
2105 ir_relation possible;
2106 ir_node *l = get_Cmp_left(cmp);
2107 ir_node *r = get_Cmp_right(cmp);
2108 ir_mode *mode = get_irn_mode(l);
2111 /* check for bit-test */
2112 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2113 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2114 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2116 ir_node *la = get_And_left(l);
2117 ir_node *ra = get_And_right(l);
2124 ir_node *c = get_Shl_left(la);
2125 if (is_Const_1(c) && is_Const_0(r)) {
2126 /* (1 << n) & ra) */
2127 ir_node *n = get_Shl_right(la);
2128 flags = gen_bt(cmp, ra, n);
2129 /* the bit is copied into the CF flag */
2130 if (relation & ir_relation_equal)
2131 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2133 *cc_out = ia32_cc_below; /* test for CF=1 */
2139 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2140 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2141 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2142 * a predecessor node). So add the < bit */
2143 possible = ir_get_possible_cmp_relations(l, r);
2144 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2145 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2146 relation |= ir_relation_less_greater;
2148 /* just do a normal transformation of the Cmp */
2149 *cc_out = relation_to_condition_code(relation, mode);
2150 flags = be_transform_node(cmp);
2155 * Transform a node returning a "flag" result.
2157 * @param node the node to transform
2158 * @param cc_out the compare mode to use
2160 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2162 assert(is_Cmp(node));
2163 return get_flags_node_cmp(node, cc_out);
2167 * Transforms a Load.
2169 * @return the created ia32 Load node
2171 static ir_node *gen_Load(ir_node *node)
2173 ir_node *old_block = get_nodes_block(node);
2174 ir_node *block = be_transform_node(old_block);
2175 ir_node *ptr = get_Load_ptr(node);
2176 ir_node *mem = get_Load_mem(node);
2177 ir_node *new_mem = be_transform_node(mem);
2178 dbg_info *dbgi = get_irn_dbg_info(node);
2179 ir_mode *mode = get_Load_mode(node);
2180 int throws_exception = ir_throws_exception(node);
2184 ia32_address_t addr;
2186 /* construct load address */
2187 memset(&addr, 0, sizeof(addr));
2188 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2195 base = be_transform_node(base);
2201 idx = be_transform_node(idx);
2204 if (mode_is_float(mode)) {
2205 if (ia32_cg_config.use_sse2) {
2206 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2209 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2213 assert(mode != mode_b);
2215 /* create a conv node with address mode for smaller modes */
2216 if (get_mode_size_bits(mode) < 32) {
2217 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2218 new_mem, noreg_GP, mode);
2220 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2223 ir_set_throws_exception(new_node, throws_exception);
2225 set_irn_pinned(new_node, get_irn_pinned(node));
2226 set_ia32_op_type(new_node, ia32_AddrModeS);
2227 set_ia32_ls_mode(new_node, mode);
2228 set_address(new_node, &addr);
2230 if (get_irn_pinned(node) == op_pin_state_floats) {
2231 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2232 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2233 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2234 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2237 SET_IA32_ORIG_NODE(new_node, node);
2242 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2243 ir_node *ptr, ir_node *other)
2250 /* we only use address mode if we're the only user of the load */
2251 if (get_irn_n_edges(node) > 1)
2254 load = get_Proj_pred(node);
2257 if (get_nodes_block(load) != block)
2260 /* store should have the same pointer as the load */
2261 if (get_Load_ptr(load) != ptr)
2264 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2265 if (other != NULL &&
2266 get_nodes_block(other) == block &&
2267 heights_reachable_in_block(ia32_heights, other, load)) {
2271 if (ia32_prevents_AM(block, load, mem))
2273 /* Store should be attached to the load via mem */
2274 assert(heights_reachable_in_block(ia32_heights, mem, load));
2279 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2280 ir_node *mem, ir_node *ptr, ir_mode *mode,
2281 construct_binop_dest_func *func,
2282 construct_binop_dest_func *func8bit,
2283 match_flags_t flags)
2285 ir_node *src_block = get_nodes_block(node);
2293 ia32_address_mode_t am;
2294 ia32_address_t *addr = &am.addr;
2295 memset(&am, 0, sizeof(am));
2297 assert(flags & match_immediate); /* there is no destam node without... */
2298 commutative = (flags & match_commutative) != 0;
2300 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2301 build_address(&am, op1, ia32_create_am_double_use);
2302 new_op = create_immediate_or_transform(op2, 0);
2303 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2304 build_address(&am, op2, ia32_create_am_double_use);
2305 new_op = create_immediate_or_transform(op1, 0);
2310 if (addr->base == NULL)
2311 addr->base = noreg_GP;
2312 if (addr->index == NULL)
2313 addr->index = noreg_GP;
2314 if (addr->mem == NULL)
2317 dbgi = get_irn_dbg_info(node);
2318 block = be_transform_node(src_block);
2319 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2321 if (get_mode_size_bits(mode) == 8) {
2322 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2324 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2326 set_address(new_node, addr);
2327 set_ia32_op_type(new_node, ia32_AddrModeD);
2328 set_ia32_ls_mode(new_node, mode);
2329 SET_IA32_ORIG_NODE(new_node, node);
2331 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2332 mem_proj = be_transform_node(am.mem_proj);
2333 be_set_transformed_node(am.mem_proj, new_node);
2334 be_set_transformed_node(mem_proj, new_node);
2339 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2340 ir_node *ptr, ir_mode *mode,
2341 construct_unop_dest_func *func)
2343 ir_node *src_block = get_nodes_block(node);
2349 ia32_address_mode_t am;
2350 ia32_address_t *addr = &am.addr;
2352 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2355 memset(&am, 0, sizeof(am));
2356 build_address(&am, op, ia32_create_am_double_use);
2358 dbgi = get_irn_dbg_info(node);
2359 block = be_transform_node(src_block);
2360 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2361 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2362 set_address(new_node, addr);
2363 set_ia32_op_type(new_node, ia32_AddrModeD);
2364 set_ia32_ls_mode(new_node, mode);
2365 SET_IA32_ORIG_NODE(new_node, node);
2367 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2368 mem_proj = be_transform_node(am.mem_proj);
2369 be_set_transformed_node(am.mem_proj, new_node);
2370 be_set_transformed_node(mem_proj, new_node);
2375 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2377 ir_mode *mode = get_irn_mode(node);
2378 ir_node *mux_true = get_Mux_true(node);
2379 ir_node *mux_false = get_Mux_false(node);
2387 ia32_condition_code_t cc;
2388 ia32_address_t addr;
2390 if (get_mode_size_bits(mode) != 8)
2393 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2395 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2401 cond = get_Mux_sel(node);
2402 flags = get_flags_node(cond, &cc);
2403 /* we can't handle the float special cases with SetM */
2404 if (cc & ia32_cc_additional_float_cases)
2407 cc = ia32_negate_condition_code(cc);
2409 build_address_ptr(&addr, ptr, mem);
2411 dbgi = get_irn_dbg_info(node);
2412 block = get_nodes_block(node);
2413 new_block = be_transform_node(block);
2414 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2415 addr.index, addr.mem, flags, cc);
2416 set_address(new_node, &addr);
2417 set_ia32_op_type(new_node, ia32_AddrModeD);
2418 set_ia32_ls_mode(new_node, mode);
2419 SET_IA32_ORIG_NODE(new_node, node);
2424 static ir_node *try_create_dest_am(ir_node *node)
2426 ir_node *val = get_Store_value(node);
2427 ir_node *mem = get_Store_mem(node);
2428 ir_node *ptr = get_Store_ptr(node);
2429 ir_mode *mode = get_irn_mode(val);
2430 unsigned bits = get_mode_size_bits(mode);
2435 /* handle only GP modes for now... */
2436 if (!ia32_mode_needs_gp_reg(mode))
2440 /* store must be the only user of the val node */
2441 if (get_irn_n_edges(val) > 1)
2443 /* skip pointless convs */
2445 ir_node *conv_op = get_Conv_op(val);
2446 ir_mode *pred_mode = get_irn_mode(conv_op);
2447 if (!ia32_mode_needs_gp_reg(pred_mode))
2449 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2457 /* value must be in the same block */
2458 if (get_nodes_block(node) != get_nodes_block(val))
2461 switch (get_irn_opcode(val)) {
2463 op1 = get_Add_left(val);
2464 op2 = get_Add_right(val);
2465 if (ia32_cg_config.use_incdec) {
2466 if (is_Const_1(op2)) {
2467 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2469 } else if (is_Const_Minus_1(op2)) {
2470 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2474 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2475 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2476 match_commutative | match_immediate);
2479 op1 = get_Sub_left(val);
2480 op2 = get_Sub_right(val);
2481 if (is_Const(op2)) {
2482 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2484 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2485 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2489 op1 = get_And_left(val);
2490 op2 = get_And_right(val);
2491 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2492 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2493 match_commutative | match_immediate);
2496 op1 = get_Or_left(val);
2497 op2 = get_Or_right(val);
2498 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2499 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2500 match_commutative | match_immediate);
2503 op1 = get_Eor_left(val);
2504 op2 = get_Eor_right(val);
2505 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2506 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2507 match_commutative | match_immediate);
2510 op1 = get_Shl_left(val);
2511 op2 = get_Shl_right(val);
2512 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2513 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2517 op1 = get_Shr_left(val);
2518 op2 = get_Shr_right(val);
2519 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2520 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2524 op1 = get_Shrs_left(val);
2525 op2 = get_Shrs_right(val);
2526 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2527 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2531 op1 = get_Rotl_left(val);
2532 op2 = get_Rotl_right(val);
2533 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2534 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2537 /* TODO: match ROR patterns... */
2539 new_node = try_create_SetMem(val, ptr, mem);
2543 op1 = get_Minus_op(val);
2544 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2547 /* should be lowered already */
2548 assert(mode != mode_b);
2549 op1 = get_Not_op(val);
2550 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2556 if (new_node != NULL) {
2557 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2558 get_irn_pinned(node) == op_pin_state_pinned) {
2559 set_irn_pinned(new_node, op_pin_state_pinned);
2566 static bool possible_int_mode_for_fp(ir_mode *mode)
2570 if (!mode_is_signed(mode))
2572 size = get_mode_size_bits(mode);
2573 if (size != 16 && size != 32)
2578 static int is_float_to_int_conv(const ir_node *node)
2580 ir_mode *mode = get_irn_mode(node);
2584 if (!possible_int_mode_for_fp(mode))
2589 conv_op = get_Conv_op(node);
2590 conv_mode = get_irn_mode(conv_op);
2592 if (!mode_is_float(conv_mode))
2599 * Transform a Store(floatConst) into a sequence of
2602 * @return the created ia32 Store node
2604 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2606 ir_mode *mode = get_irn_mode(cns);
2607 unsigned size = get_mode_size_bytes(mode);
2608 ir_tarval *tv = get_Const_tarval(cns);
2609 ir_node *block = get_nodes_block(node);
2610 ir_node *new_block = be_transform_node(block);
2611 ir_node *ptr = get_Store_ptr(node);
2612 ir_node *mem = get_Store_mem(node);
2613 dbg_info *dbgi = get_irn_dbg_info(node);
2616 int throws_exception = ir_throws_exception(node);
2618 ia32_address_t addr;
2620 assert(size % 4 == 0);
2623 build_address_ptr(&addr, ptr, mem);
2627 get_tarval_sub_bits(tv, ofs) |
2628 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2629 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2630 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2631 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2633 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2634 addr.index, addr.mem, imm);
2635 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2637 ir_set_throws_exception(new_node, throws_exception);
2638 set_irn_pinned(new_node, get_irn_pinned(node));
2639 set_ia32_op_type(new_node, ia32_AddrModeD);
2640 set_ia32_ls_mode(new_node, mode_Iu);
2641 set_address(new_node, &addr);
2642 SET_IA32_ORIG_NODE(new_node, node);
2650 } while (size != 0);
2653 return new_rd_Sync(dbgi, new_block, i, ins);
2655 return get_Proj_pred(ins[0]);
2660 * Generate a vfist or vfisttp instruction.
2662 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2663 ir_node *index, ir_node *mem, ir_node *val)
2665 if (ia32_cg_config.use_fisttp) {
2666 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2667 if other users exists */
2668 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2669 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2670 be_new_Keep(block, 1, &value);
2674 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2677 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2683 * Transforms a general (no special case) Store.
2685 * @return the created ia32 Store node
2687 static ir_node *gen_general_Store(ir_node *node)
2689 ir_node *val = get_Store_value(node);
2690 ir_mode *mode = get_irn_mode(val);
2691 ir_node *block = get_nodes_block(node);
2692 ir_node *new_block = be_transform_node(block);
2693 ir_node *ptr = get_Store_ptr(node);
2694 ir_node *mem = get_Store_mem(node);
2695 dbg_info *dbgi = get_irn_dbg_info(node);
2696 int throws_exception = ir_throws_exception(node);
2699 ia32_address_t addr;
2701 /* check for destination address mode */
2702 new_node = try_create_dest_am(node);
2703 if (new_node != NULL)
2706 /* construct store address */
2707 memset(&addr, 0, sizeof(addr));
2708 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2710 if (addr.base == NULL) {
2711 addr.base = noreg_GP;
2713 addr.base = be_transform_node(addr.base);
2716 if (addr.index == NULL) {
2717 addr.index = noreg_GP;
2719 addr.index = be_transform_node(addr.index);
2721 addr.mem = be_transform_node(mem);
2723 if (mode_is_float(mode)) {
2724 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2726 while (is_Conv(val) && mode == get_irn_mode(val)) {
2727 ir_node *op = get_Conv_op(val);
2728 if (!mode_is_float(get_irn_mode(op)))
2732 new_val = be_transform_node(val);
2733 if (ia32_cg_config.use_sse2) {
2734 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2735 addr.index, addr.mem, new_val);
2737 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2738 addr.index, addr.mem, new_val, mode);
2740 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2741 val = get_Conv_op(val);
2743 /* TODO: is this optimisation still necessary at all (middleend)? */
2744 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2746 while (is_Conv(val)) {
2747 ir_node *op = get_Conv_op(val);
2748 if (!mode_is_float(get_irn_mode(op)))
2750 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2754 new_val = be_transform_node(val);
2755 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2757 new_val = create_immediate_or_transform(val, 0);
2758 assert(mode != mode_b);
2760 if (get_mode_size_bits(mode) == 8) {
2761 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2762 addr.index, addr.mem, new_val);
2764 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2765 addr.index, addr.mem, new_val);
2768 ir_set_throws_exception(new_node, throws_exception);
2770 set_irn_pinned(new_node, get_irn_pinned(node));
2771 set_ia32_op_type(new_node, ia32_AddrModeD);
2772 set_ia32_ls_mode(new_node, mode);
2774 set_address(new_node, &addr);
2775 SET_IA32_ORIG_NODE(new_node, node);
2781 * Transforms a Store.
2783 * @return the created ia32 Store node
2785 static ir_node *gen_Store(ir_node *node)
2787 ir_node *val = get_Store_value(node);
2788 ir_mode *mode = get_irn_mode(val);
2790 if (mode_is_float(mode) && is_Const(val)) {
2791 /* We can transform every floating const store
2792 into a sequence of integer stores.
2793 If the constant is already in a register,
2794 it would be better to use it, but we don't
2795 have this information here. */
2796 return gen_float_const_Store(node, val);
2798 return gen_general_Store(node);
2802 * Transforms a Switch.
2804 * @return the created ia32 SwitchJmp node
2806 static ir_node *create_Switch(ir_node *node)
2808 dbg_info *dbgi = get_irn_dbg_info(node);
2809 ir_node *block = be_transform_node(get_nodes_block(node));
2810 ir_node *sel = get_Cond_selector(node);
2811 ir_node *new_sel = be_transform_node(sel);
2812 long default_pn = get_Cond_default_proj(node);
2816 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2818 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2819 set_entity_visibility(entity, ir_visibility_private);
2820 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2822 /* TODO: we could perform some more matching here to also use the base
2823 * register of the address mode */
2825 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2826 set_ia32_am_scale(new_node, 2);
2827 set_ia32_am_sc(new_node, entity);
2828 set_ia32_op_type(new_node, ia32_AddrModeS);
2829 set_ia32_ls_mode(new_node, mode_Iu);
2830 SET_IA32_ORIG_NODE(new_node, node);
2831 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2832 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2838 * Transform a Cond node.
2840 static ir_node *gen_Cond(ir_node *node)
2842 ir_node *block = get_nodes_block(node);
2843 ir_node *new_block = be_transform_node(block);
2844 dbg_info *dbgi = get_irn_dbg_info(node);
2845 ir_node *sel = get_Cond_selector(node);
2846 ir_mode *sel_mode = get_irn_mode(sel);
2847 ir_node *flags = NULL;
2849 ia32_condition_code_t cc;
2851 if (sel_mode != mode_b) {
2852 return create_Switch(node);
2855 /* we get flags from a Cmp */
2856 flags = get_flags_node(sel, &cc);
2858 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2859 SET_IA32_ORIG_NODE(new_node, node);
2865 * Transform a be_Copy.
2867 static ir_node *gen_be_Copy(ir_node *node)
2869 ir_node *new_node = be_duplicate_node(node);
2870 ir_mode *mode = get_irn_mode(new_node);
2872 if (ia32_mode_needs_gp_reg(mode)) {
2873 set_irn_mode(new_node, mode_Iu);
2879 static ir_node *create_Fucom(ir_node *node)
2881 dbg_info *dbgi = get_irn_dbg_info(node);
2882 ir_node *block = get_nodes_block(node);
2883 ir_node *new_block = be_transform_node(block);
2884 ir_node *left = get_Cmp_left(node);
2885 ir_node *new_left = be_transform_node(left);
2886 ir_node *right = get_Cmp_right(node);
2890 if (ia32_cg_config.use_fucomi) {
2891 new_right = be_transform_node(right);
2892 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2894 set_ia32_commutative(new_node);
2895 SET_IA32_ORIG_NODE(new_node, node);
2897 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2898 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2900 new_right = be_transform_node(right);
2901 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2904 set_ia32_commutative(new_node);
2906 SET_IA32_ORIG_NODE(new_node, node);
2908 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2909 SET_IA32_ORIG_NODE(new_node, node);
2915 static ir_node *create_Ucomi(ir_node *node)
2917 dbg_info *dbgi = get_irn_dbg_info(node);
2918 ir_node *src_block = get_nodes_block(node);
2919 ir_node *new_block = be_transform_node(src_block);
2920 ir_node *left = get_Cmp_left(node);
2921 ir_node *right = get_Cmp_right(node);
2923 ia32_address_mode_t am;
2924 ia32_address_t *addr = &am.addr;
2926 match_arguments(&am, src_block, left, right, NULL,
2927 match_commutative | match_am);
2929 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2930 addr->mem, am.new_op1, am.new_op2,
2932 set_am_attributes(new_node, &am);
2934 SET_IA32_ORIG_NODE(new_node, node);
2936 new_node = fix_mem_proj(new_node, &am);
2942 * returns true if it is assured, that the upper bits of a node are "clean"
2943 * which means for a 16 or 8 bit value, that the upper bits in the register
2944 * are 0 for unsigned and a copy of the last significant bit for signed
2947 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2949 assert(ia32_mode_needs_gp_reg(mode));
2950 if (get_mode_size_bits(mode) >= 32)
2953 if (is_Proj(transformed_node))
2954 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2956 switch (get_ia32_irn_opcode(transformed_node)) {
2957 case iro_ia32_Conv_I2I:
2958 case iro_ia32_Conv_I2I8Bit: {
2959 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2960 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2962 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2969 if (mode_is_signed(mode)) {
2970 return false; /* TODO handle signed modes */
2972 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2973 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2974 const ia32_immediate_attr_t *attr
2975 = get_ia32_immediate_attr_const(right);
2976 if (attr->symconst == 0 &&
2977 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2981 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2985 /* TODO too conservative if shift amount is constant */
2986 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2989 if (!mode_is_signed(mode)) {
2991 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2992 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2994 /* TODO if one is known to be zero extended, then || is sufficient */
2999 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3000 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3002 case iro_ia32_Const:
3003 case iro_ia32_Immediate: {
3004 const ia32_immediate_attr_t *attr =
3005 get_ia32_immediate_attr_const(transformed_node);
3006 if (mode_is_signed(mode)) {
3007 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3008 return shifted == 0 || shifted == -1;
3010 unsigned long shifted = (unsigned long)attr->offset;
3011 shifted >>= get_mode_size_bits(mode)-1;
3013 return shifted == 0;
3023 * Generate code for a Cmp.
3025 static ir_node *gen_Cmp(ir_node *node)
3027 dbg_info *dbgi = get_irn_dbg_info(node);
3028 ir_node *block = get_nodes_block(node);
3029 ir_node *new_block = be_transform_node(block);
3030 ir_node *left = get_Cmp_left(node);
3031 ir_node *right = get_Cmp_right(node);
3032 ir_mode *cmp_mode = get_irn_mode(left);
3034 ia32_address_mode_t am;
3035 ia32_address_t *addr = &am.addr;
3037 if (mode_is_float(cmp_mode)) {
3038 if (ia32_cg_config.use_sse2) {
3039 return create_Ucomi(node);
3041 return create_Fucom(node);
3045 assert(ia32_mode_needs_gp_reg(cmp_mode));
3047 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3048 if (is_Const_0(right) &&
3050 get_irn_n_edges(left) == 1) {
3051 /* Test(and_left, and_right) */
3052 ir_node *and_left = get_And_left(left);
3053 ir_node *and_right = get_And_right(left);
3055 /* matze: code here used mode instead of cmd_mode, I think it is always
3056 * the same as cmp_mode, but I leave this here to see if this is really
3059 assert(get_irn_mode(and_left) == cmp_mode);
3061 match_arguments(&am, block, and_left, and_right, NULL,
3063 match_am | match_8bit_am | match_16bit_am |
3064 match_am_and_immediates | match_immediate);
3066 /* use 32bit compare mode if possible since the opcode is smaller */
3067 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3068 upper_bits_clean(am.new_op2, cmp_mode)) {
3069 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3072 if (get_mode_size_bits(cmp_mode) == 8) {
3073 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3074 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3076 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3077 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3080 /* Cmp(left, right) */
3081 match_arguments(&am, block, left, right, NULL,
3082 match_commutative | match_am | match_8bit_am |
3083 match_16bit_am | match_am_and_immediates |
3085 /* use 32bit compare mode if possible since the opcode is smaller */
3086 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3087 upper_bits_clean(am.new_op2, cmp_mode)) {
3088 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3091 if (get_mode_size_bits(cmp_mode) == 8) {
3092 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3093 addr->index, addr->mem, am.new_op1,
3094 am.new_op2, am.ins_permuted);
3096 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3097 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3100 set_am_attributes(new_node, &am);
3101 set_ia32_ls_mode(new_node, cmp_mode);
3103 SET_IA32_ORIG_NODE(new_node, node);
3105 new_node = fix_mem_proj(new_node, &am);
3110 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3111 ia32_condition_code_t cc)
3113 dbg_info *dbgi = get_irn_dbg_info(node);
3114 ir_node *block = get_nodes_block(node);
3115 ir_node *new_block = be_transform_node(block);
3116 ir_node *val_true = get_Mux_true(node);
3117 ir_node *val_false = get_Mux_false(node);
3119 ia32_address_mode_t am;
3120 ia32_address_t *addr;
3122 assert(ia32_cg_config.use_cmov);
3123 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3127 match_arguments(&am, block, val_false, val_true, flags,
3128 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3130 if (am.ins_permuted)
3131 cc = ia32_negate_condition_code(cc);
3133 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3134 addr->mem, am.new_op1, am.new_op2, new_flags,
3136 set_am_attributes(new_node, &am);
3138 SET_IA32_ORIG_NODE(new_node, node);
3140 new_node = fix_mem_proj(new_node, &am);
3146 * Creates a ia32 Setcc instruction.
3148 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3149 ir_node *flags, ia32_condition_code_t cc,
3152 ir_mode *mode = get_irn_mode(orig_node);
3155 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3156 SET_IA32_ORIG_NODE(new_node, orig_node);
3158 /* we might need to conv the result up */
3159 if (get_mode_size_bits(mode) > 8) {
3160 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3161 nomem, new_node, mode_Bu);
3162 SET_IA32_ORIG_NODE(new_node, orig_node);
3169 * Create instruction for an unsigned Difference or Zero.
3171 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3173 ir_mode *mode = get_irn_mode(psi);
3183 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3184 match_mode_neutral | match_am | match_immediate | match_two_users);
3186 block = get_nodes_block(new_node);
3188 if (is_Proj(new_node)) {
3189 sub = get_Proj_pred(new_node);
3192 set_irn_mode(sub, mode_T);
3193 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3195 assert(is_ia32_Sub(sub));
3196 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3198 dbgi = get_irn_dbg_info(psi);
3199 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3200 notn = new_bd_ia32_Not(dbgi, block, sbb);
3202 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3203 set_ia32_commutative(new_node);
3208 * Create an const array of two float consts.
3210 * @param c0 the first constant
3211 * @param c1 the second constant
3212 * @param new_mode IN/OUT for the mode of the constants, if NULL
3213 * smallest possible mode will be used
3215 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3218 ir_mode *mode = *new_mode;
3220 ir_initializer_t *initializer;
3221 ir_tarval *tv0 = get_Const_tarval(c0);
3222 ir_tarval *tv1 = get_Const_tarval(c1);
3225 /* detect the best mode for the constants */
3226 mode = get_tarval_mode(tv0);
3228 if (mode != mode_F) {
3229 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3230 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3232 tv0 = tarval_convert_to(tv0, mode);
3233 tv1 = tarval_convert_to(tv1, mode);
3234 } else if (mode != mode_D) {
3235 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3236 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3238 tv0 = tarval_convert_to(tv0, mode);
3239 tv1 = tarval_convert_to(tv1, mode);
3246 tp = ia32_create_float_type(mode, 4);
3247 tp = ia32_create_float_array(tp);
3249 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3251 set_entity_ld_ident(ent, get_entity_ident(ent));
3252 set_entity_visibility(ent, ir_visibility_private);
3253 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3255 initializer = create_initializer_compound(2);
3257 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3258 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3260 set_entity_initializer(ent, initializer);
3267 * Possible transformations for creating a Setcc.
3269 enum setcc_transform_insn {
3282 typedef struct setcc_transform {
3284 ia32_condition_code_t cc;
3286 enum setcc_transform_insn transform;
3290 } setcc_transform_t;
3293 * Setcc can only handle 0 and 1 result.
3294 * Find a transformation that creates 0 and 1 from
3297 static void find_const_transform(ia32_condition_code_t cc,
3298 ir_tarval *t, ir_tarval *f,
3299 setcc_transform_t *res)
3305 if (tarval_is_null(t)) {
3309 cc = ia32_negate_condition_code(cc);
3310 } else if (tarval_cmp(t, f) == ir_relation_less) {
3311 // now, t is the bigger one
3315 cc = ia32_negate_condition_code(cc);
3319 if (! tarval_is_null(f)) {
3320 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3323 res->steps[step].transform = SETCC_TR_ADD;
3325 if (t == tarval_bad)
3326 panic("constant subtract failed");
3327 if (! tarval_is_long(f))
3328 panic("tarval is not long");
3330 res->steps[step].val = get_tarval_long(f);
3332 f = tarval_sub(f, f, NULL);
3333 assert(tarval_is_null(f));
3336 if (tarval_is_one(t)) {
3337 res->steps[step].transform = SETCC_TR_SET;
3338 res->num_steps = ++step;
3342 if (tarval_is_minus_one(t)) {
3343 res->steps[step].transform = SETCC_TR_NEG;
3345 res->steps[step].transform = SETCC_TR_SET;
3346 res->num_steps = ++step;
3349 if (tarval_is_long(t)) {
3350 long v = get_tarval_long(t);
3352 res->steps[step].val = 0;
3355 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3357 res->steps[step].transform = SETCC_TR_LEAxx;
3358 res->steps[step].scale = 3; /* (a << 3) + a */
3361 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3363 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3364 res->steps[step].scale = 3; /* (a << 3) */
3367 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3369 res->steps[step].transform = SETCC_TR_LEAxx;
3370 res->steps[step].scale = 2; /* (a << 2) + a */
3373 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3375 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3376 res->steps[step].scale = 2; /* (a << 2) */
3379 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3381 res->steps[step].transform = SETCC_TR_LEAxx;
3382 res->steps[step].scale = 1; /* (a << 1) + a */
3385 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3387 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3388 res->steps[step].scale = 1; /* (a << 1) */
3391 res->num_steps = step;
3394 if (! tarval_is_single_bit(t)) {
3395 res->steps[step].transform = SETCC_TR_AND;
3396 res->steps[step].val = v;
3398 res->steps[step].transform = SETCC_TR_NEG;
3400 int val = get_tarval_lowest_bit(t);
3403 res->steps[step].transform = SETCC_TR_SHL;
3404 res->steps[step].scale = val;
3408 res->steps[step].transform = SETCC_TR_SET;
3409 res->num_steps = ++step;
3412 panic("tarval is not long");
3416 * Transforms a Mux node into some code sequence.
3418 * @return The transformed node.
3420 static ir_node *gen_Mux(ir_node *node)
3422 dbg_info *dbgi = get_irn_dbg_info(node);
3423 ir_node *block = get_nodes_block(node);
3424 ir_node *new_block = be_transform_node(block);
3425 ir_node *mux_true = get_Mux_true(node);
3426 ir_node *mux_false = get_Mux_false(node);
3427 ir_node *sel = get_Mux_sel(node);
3428 ir_mode *mode = get_irn_mode(node);
3432 ia32_condition_code_t cc;
3434 assert(get_irn_mode(sel) == mode_b);
3436 is_abs = ir_mux_is_abs(sel, mux_true, mux_false);
3438 if (ia32_mode_needs_gp_reg(mode)) {
3439 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3442 ir_node *op = ir_get_abs_op(sel, mux_true, mux_false);
3443 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3447 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3448 if (mode_is_float(mode)) {
3449 ir_node *cmp_left = get_Cmp_left(sel);
3450 ir_node *cmp_right = get_Cmp_right(sel);
3451 ir_relation relation = get_Cmp_relation(sel);
3453 if (ia32_cg_config.use_sse2) {
3454 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3455 if (cmp_left == mux_true && cmp_right == mux_false) {
3456 /* Mux(a <= b, a, b) => MIN */
3457 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3458 match_commutative | match_am | match_two_users);
3459 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3460 /* Mux(a <= b, b, a) => MAX */
3461 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3462 match_commutative | match_am | match_two_users);
3464 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3465 if (cmp_left == mux_true && cmp_right == mux_false) {
3466 /* Mux(a >= b, a, b) => MAX */
3467 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3468 match_commutative | match_am | match_two_users);
3469 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3470 /* Mux(a >= b, b, a) => MIN */
3471 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3472 match_commutative | match_am | match_two_users);
3477 if (is_Const(mux_true) && is_Const(mux_false)) {
3478 ia32_address_mode_t am;
3483 flags = get_flags_node(sel, &cc);
3484 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3486 if (ia32_cg_config.use_sse2) {
3487 /* cannot load from different mode on SSE */
3490 /* x87 can load any mode */
3494 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3496 switch (get_mode_size_bytes(new_mode)) {
3506 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3507 set_ia32_am_scale(new_node, 2);
3512 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3513 set_ia32_am_scale(new_node, 1);
3516 /* arg, shift 16 NOT supported */
3518 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3521 panic("Unsupported constant size");
3524 am.ls_mode = new_mode;
3525 am.addr.base = get_symconst_base();
3526 am.addr.index = new_node;
3527 am.addr.mem = nomem;
3529 am.addr.scale = scale;
3530 am.addr.use_frame = 0;
3531 am.addr.tls_segment = false;
3532 am.addr.frame_entity = NULL;
3533 am.addr.symconst_sign = 0;
3534 am.mem_proj = am.addr.mem;
3535 am.op_type = ia32_AddrModeS;
3538 am.pinned = op_pin_state_floats;
3540 am.ins_permuted = false;
3542 if (ia32_cg_config.use_sse2)
3543 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3545 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3546 set_am_attributes(load, &am);
3548 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3550 panic("cannot transform floating point Mux");
3553 assert(ia32_mode_needs_gp_reg(mode));
3556 ir_node *cmp_left = get_Cmp_left(sel);
3557 ir_node *cmp_right = get_Cmp_right(sel);
3558 ir_relation relation = get_Cmp_relation(sel);
3559 ir_node *val_true = mux_true;
3560 ir_node *val_false = mux_false;
3562 if (is_Const(val_true) && is_Const_null(val_true)) {
3563 ir_node *tmp = val_false;
3564 val_false = val_true;
3566 relation = get_negated_relation(relation);
3568 if (is_Const_0(val_false) && is_Sub(val_true)) {
3569 if ((relation & ir_relation_greater)
3570 && get_Sub_left(val_true) == cmp_left
3571 && get_Sub_right(val_true) == cmp_right) {
3572 return create_doz(node, cmp_left, cmp_right);
3574 if ((relation & ir_relation_less)
3575 && get_Sub_left(val_true) == cmp_right
3576 && get_Sub_right(val_true) == cmp_left) {
3577 return create_doz(node, cmp_right, cmp_left);
3582 flags = get_flags_node(sel, &cc);
3584 if (is_Const(mux_true) && is_Const(mux_false)) {
3585 /* both are const, good */
3586 ir_tarval *tv_true = get_Const_tarval(mux_true);
3587 ir_tarval *tv_false = get_Const_tarval(mux_false);
3588 setcc_transform_t res;
3591 find_const_transform(cc, tv_true, tv_false, &res);
3593 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3596 switch (res.steps[step].transform) {
3598 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3599 add_ia32_am_offs_int(new_node, res.steps[step].val);
3601 case SETCC_TR_ADDxx:
3602 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3605 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3606 set_ia32_am_scale(new_node, res.steps[step].scale);
3607 set_ia32_am_offs_int(new_node, res.steps[step].val);
3609 case SETCC_TR_LEAxx:
3610 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3611 set_ia32_am_scale(new_node, res.steps[step].scale);
3612 set_ia32_am_offs_int(new_node, res.steps[step].val);
3615 imm = ia32_immediate_from_long(res.steps[step].scale);
3616 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3619 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3622 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3625 imm = ia32_immediate_from_long(res.steps[step].val);
3626 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3629 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3632 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3635 panic("unknown setcc transform");
3639 new_node = create_CMov(node, sel, flags, cc);
3645 static ir_node *gen_ia32_l_Setcc(ir_node *node)
3647 ia32_condition_code_t cc;
3648 dbg_info *dbgi = get_irn_dbg_info(node);
3649 ir_node *block = get_nodes_block(node);
3650 ir_node *new_block = be_transform_node(block);
3651 ir_node *cond = get_irn_n(node, n_ia32_l_Setcc_cond);
3652 ir_node *flags = get_flags_node(cond, &cc);
3653 ir_node *new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3654 SET_IA32_ORIG_NODE(new_node, node);
3659 * Create a conversion from x87 state register to general purpose.
3661 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3663 ir_node *block = be_transform_node(get_nodes_block(node));
3664 ir_node *op = get_Conv_op(node);
3665 ir_node *new_op = be_transform_node(op);
3666 ir_graph *irg = current_ir_graph;
3667 dbg_info *dbgi = get_irn_dbg_info(node);
3668 ir_mode *mode = get_irn_mode(node);
3669 ir_node *frame = get_irg_frame(irg);
3670 ir_node *fist, *load, *mem;
3672 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3673 set_irn_pinned(fist, op_pin_state_floats);
3674 set_ia32_use_frame(fist);
3675 set_ia32_op_type(fist, ia32_AddrModeD);
3677 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3678 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3680 assert(get_mode_size_bits(mode) <= 32);
3681 /* exception we can only store signed 32 bit integers, so for unsigned
3682 we store a 64bit (signed) integer and load the lower bits */
3683 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3684 set_ia32_ls_mode(fist, mode_Ls);
3686 set_ia32_ls_mode(fist, mode_Is);
3688 SET_IA32_ORIG_NODE(fist, node);
3691 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3693 set_irn_pinned(load, op_pin_state_floats);
3694 set_ia32_use_frame(load);
3695 set_ia32_op_type(load, ia32_AddrModeS);
3696 set_ia32_ls_mode(load, mode_Is);
3697 if (get_ia32_ls_mode(fist) == mode_Ls) {
3698 ia32_attr_t *attr = get_ia32_attr(load);
3699 attr->data.need_64bit_stackent = 1;
3701 ia32_attr_t *attr = get_ia32_attr(load);
3702 attr->data.need_32bit_stackent = 1;
3704 SET_IA32_ORIG_NODE(load, node);
3706 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3710 * Creates a x87 strict Conv by placing a Store and a Load
3712 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3714 ir_node *block = get_nodes_block(node);
3715 ir_graph *irg = get_Block_irg(block);
3716 dbg_info *dbgi = get_irn_dbg_info(node);
3717 ir_node *frame = get_irg_frame(irg);
3719 ir_node *store, *load;
3722 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3723 set_ia32_use_frame(store);
3724 set_ia32_op_type(store, ia32_AddrModeD);
3725 SET_IA32_ORIG_NODE(store, node);
3727 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3729 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3730 set_ia32_use_frame(load);
3731 set_ia32_op_type(load, ia32_AddrModeS);
3732 SET_IA32_ORIG_NODE(load, node);
3734 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3738 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3739 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3741 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3743 func = get_mode_size_bits(mode) == 8 ?
3744 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3745 return func(dbgi, block, base, index, mem, val, mode);
3749 * Create a conversion from general purpose to x87 register
3751 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3753 ir_node *src_block = get_nodes_block(node);
3754 ir_node *block = be_transform_node(src_block);
3755 ir_graph *irg = get_Block_irg(block);
3756 dbg_info *dbgi = get_irn_dbg_info(node);
3757 ir_node *op = get_Conv_op(node);
3758 ir_node *new_op = NULL;
3760 ir_mode *store_mode;
3766 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3767 if (possible_int_mode_for_fp(src_mode)) {
3768 ia32_address_mode_t am;
3770 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3771 if (am.op_type == ia32_AddrModeS) {
3772 ia32_address_t *addr = &am.addr;
3774 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3775 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3777 set_am_attributes(fild, &am);
3778 SET_IA32_ORIG_NODE(fild, node);
3780 fix_mem_proj(fild, &am);
3785 if (new_op == NULL) {
3786 new_op = be_transform_node(op);
3789 mode = get_irn_mode(op);
3791 /* first convert to 32 bit signed if necessary */
3792 if (get_mode_size_bits(src_mode) < 32) {
3793 if (!upper_bits_clean(new_op, src_mode)) {
3794 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3795 SET_IA32_ORIG_NODE(new_op, node);
3800 assert(get_mode_size_bits(mode) == 32);
3803 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3805 set_ia32_use_frame(store);
3806 set_ia32_op_type(store, ia32_AddrModeD);
3807 set_ia32_ls_mode(store, mode_Iu);
3809 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3811 /* exception for 32bit unsigned, do a 64bit spill+load */
3812 if (!mode_is_signed(mode)) {
3815 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3817 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3818 noreg_GP, nomem, zero_const);
3819 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3821 set_ia32_use_frame(zero_store);
3822 set_ia32_op_type(zero_store, ia32_AddrModeD);
3823 add_ia32_am_offs_int(zero_store, 4);
3824 set_ia32_ls_mode(zero_store, mode_Iu);
3826 in[0] = zero_store_mem;
3829 store_mem = new_rd_Sync(dbgi, block, 2, in);
3830 store_mode = mode_Ls;
3832 store_mode = mode_Is;
3836 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3838 set_ia32_use_frame(fild);
3839 set_ia32_op_type(fild, ia32_AddrModeS);
3840 set_ia32_ls_mode(fild, store_mode);
3842 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3848 * Create a conversion from one integer mode into another one
3850 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3851 dbg_info *dbgi, ir_node *block, ir_node *op,
3854 ir_node *new_block = be_transform_node(block);
3856 ir_mode *smaller_mode;
3857 ia32_address_mode_t am;
3858 ia32_address_t *addr = &am.addr;
3861 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3862 smaller_mode = src_mode;
3864 smaller_mode = tgt_mode;
3867 #ifdef DEBUG_libfirm
3869 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3874 match_arguments(&am, block, NULL, op, NULL,
3875 match_am | match_8bit_am | match_16bit_am);
3877 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3878 /* unnecessary conv. in theory it shouldn't have been AM */
3879 assert(is_ia32_NoReg_GP(addr->base));
3880 assert(is_ia32_NoReg_GP(addr->index));
3881 assert(is_NoMem(addr->mem));
3882 assert(am.addr.offset == 0);
3883 assert(am.addr.symconst_ent == NULL);
3887 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3888 addr->mem, am.new_op2, smaller_mode);
3889 set_am_attributes(new_node, &am);
3890 /* match_arguments assume that out-mode = in-mode, this isn't true here
3892 set_ia32_ls_mode(new_node, smaller_mode);
3893 SET_IA32_ORIG_NODE(new_node, node);
3894 new_node = fix_mem_proj(new_node, &am);
3899 * Transforms a Conv node.
3901 * @return The created ia32 Conv node
3903 static ir_node *gen_Conv(ir_node *node)
3905 ir_node *block = get_nodes_block(node);
3906 ir_node *new_block = be_transform_node(block);
3907 ir_node *op = get_Conv_op(node);
3908 ir_node *new_op = NULL;
3909 dbg_info *dbgi = get_irn_dbg_info(node);
3910 ir_mode *src_mode = get_irn_mode(op);
3911 ir_mode *tgt_mode = get_irn_mode(node);
3912 int src_bits = get_mode_size_bits(src_mode);
3913 int tgt_bits = get_mode_size_bits(tgt_mode);
3914 ir_node *res = NULL;
3916 assert(!mode_is_int(src_mode) || src_bits <= 32);
3917 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3919 /* modeB -> X should already be lowered by the lower_mode_b pass */
3920 if (src_mode == mode_b) {
3921 panic("ConvB not lowered %+F", node);
3924 if (src_mode == tgt_mode) {
3925 if (get_Conv_strict(node)) {
3926 if (ia32_cg_config.use_sse2) {
3927 /* when we are in SSE mode, we can kill all strict no-op conversion */
3928 return be_transform_node(op);
3931 /* this should be optimized already, but who knows... */
3932 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3933 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3934 return be_transform_node(op);
3938 if (mode_is_float(src_mode)) {
3939 new_op = be_transform_node(op);
3940 /* we convert from float ... */
3941 if (mode_is_float(tgt_mode)) {
3943 if (ia32_cg_config.use_sse2) {
3944 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3945 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3947 set_ia32_ls_mode(res, tgt_mode);
3949 if (get_Conv_strict(node)) {
3950 /* if fp_no_float_fold is not set then we assume that we
3951 * don't have any float operations in a non
3952 * mode_float_arithmetic mode and can skip strict upconvs */
3953 if (src_bits < tgt_bits) {
3954 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3957 res = gen_x87_strict_conv(tgt_mode, new_op);
3958 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3962 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3967 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3968 if (ia32_cg_config.use_sse2) {
3969 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3971 set_ia32_ls_mode(res, src_mode);
3973 return gen_x87_fp_to_gp(node);
3977 /* we convert from int ... */
3978 if (mode_is_float(tgt_mode)) {
3980 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3981 if (ia32_cg_config.use_sse2) {
3982 new_op = be_transform_node(op);
3983 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3985 set_ia32_ls_mode(res, tgt_mode);
3987 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3988 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3989 res = gen_x87_gp_to_fp(node, src_mode);
3991 /* we need a strict-Conv, if the int mode has more bits than the
3993 if (float_mantissa < int_mantissa) {
3994 res = gen_x87_strict_conv(tgt_mode, res);
3995 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3999 } else if (tgt_mode == mode_b) {
4000 /* mode_b lowering already took care that we only have 0/1 values */
4001 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4002 src_mode, tgt_mode));
4003 return be_transform_node(op);
4006 if (src_bits == tgt_bits) {
4007 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4008 src_mode, tgt_mode));
4009 return be_transform_node(op);
4012 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4020 static ir_node *create_immediate_or_transform(ir_node *node,
4021 char immediate_constraint_type)
4023 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4024 if (new_node == NULL) {
4025 new_node = be_transform_node(node);
4031 * Transforms a FrameAddr into an ia32 Add.
4033 static ir_node *gen_be_FrameAddr(ir_node *node)
4035 ir_node *block = be_transform_node(get_nodes_block(node));
4036 ir_node *op = be_get_FrameAddr_frame(node);
4037 ir_node *new_op = be_transform_node(op);
4038 dbg_info *dbgi = get_irn_dbg_info(node);
4041 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4042 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4043 set_ia32_use_frame(new_node);
4045 SET_IA32_ORIG_NODE(new_node, node);
4051 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4053 static ir_node *gen_be_Return(ir_node *node)
4055 ir_graph *irg = current_ir_graph;
4056 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4057 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4058 ir_node *new_ret_val = be_transform_node(ret_val);
4059 ir_node *new_ret_mem = be_transform_node(ret_mem);
4060 ir_entity *ent = get_irg_entity(irg);
4061 ir_type *tp = get_entity_type(ent);
4062 dbg_info *dbgi = get_irn_dbg_info(node);
4063 ir_node *block = be_transform_node(get_nodes_block(node));
4077 assert(ret_val != NULL);
4078 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4079 return be_duplicate_node(node);
4082 res_type = get_method_res_type(tp, 0);
4084 if (! is_Primitive_type(res_type)) {
4085 return be_duplicate_node(node);
4088 mode = get_type_mode(res_type);
4089 if (! mode_is_float(mode)) {
4090 return be_duplicate_node(node);
4093 assert(get_method_n_ress(tp) == 1);
4095 frame = get_irg_frame(irg);
4097 /* store xmm0 onto stack */
4098 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4099 new_ret_mem, new_ret_val);
4100 set_ia32_ls_mode(sse_store, mode);
4101 set_ia32_op_type(sse_store, ia32_AddrModeD);
4102 set_ia32_use_frame(sse_store);
4103 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4105 /* load into x87 register */
4106 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4107 set_ia32_op_type(fld, ia32_AddrModeS);
4108 set_ia32_use_frame(fld);
4110 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4111 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4113 /* create a new return */
4114 arity = get_irn_arity(node);
4115 in = ALLOCAN(ir_node*, arity);
4116 pop = be_Return_get_pop(node);
4117 for (i = 0; i < arity; ++i) {
4118 ir_node *op = get_irn_n(node, i);
4119 if (op == ret_val) {
4121 } else if (op == ret_mem) {
4124 in[i] = be_transform_node(op);
4127 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4128 copy_node_attr(irg, node, new_node);
4134 * Transform a be_AddSP into an ia32_SubSP.
4136 static ir_node *gen_be_AddSP(ir_node *node)
4138 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4139 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4141 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4142 match_am | match_immediate);
4143 assert(is_ia32_SubSP(new_node));
4144 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4145 &ia32_registers[REG_ESP]);
4150 * Transform a be_SubSP into an ia32_AddSP
4152 static ir_node *gen_be_SubSP(ir_node *node)
4154 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4155 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4157 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4158 match_am | match_immediate);
4159 assert(is_ia32_AddSP(new_node));
4160 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4161 &ia32_registers[REG_ESP]);
4166 * Change some phi modes
4168 static ir_node *gen_Phi(ir_node *node)
4170 const arch_register_req_t *req;
4171 ir_node *block = be_transform_node(get_nodes_block(node));
4172 ir_graph *irg = current_ir_graph;
4173 dbg_info *dbgi = get_irn_dbg_info(node);
4174 ir_mode *mode = get_irn_mode(node);
4177 if (ia32_mode_needs_gp_reg(mode)) {
4178 /* we shouldn't have any 64bit stuff around anymore */
4179 assert(get_mode_size_bits(mode) <= 32);
4180 /* all integer operations are on 32bit registers now */
4182 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4183 } else if (mode_is_float(mode)) {
4184 if (ia32_cg_config.use_sse2) {
4186 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4189 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4192 req = arch_no_register_req;
4195 /* phi nodes allow loops, so we use the old arguments for now
4196 * and fix this later */
4197 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4198 get_irn_in(node) + 1);
4199 copy_node_attr(irg, node, phi);
4200 be_duplicate_deps(node, phi);
4202 arch_set_irn_register_req_out(phi, 0, req);
4204 be_enqueue_preds(node);
4209 static ir_node *gen_Jmp(ir_node *node)
4211 ir_node *block = get_nodes_block(node);
4212 ir_node *new_block = be_transform_node(block);
4213 dbg_info *dbgi = get_irn_dbg_info(node);
4216 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4217 SET_IA32_ORIG_NODE(new_node, node);
4225 static ir_node *gen_IJmp(ir_node *node)
4227 ir_node *block = get_nodes_block(node);
4228 ir_node *new_block = be_transform_node(block);
4229 dbg_info *dbgi = get_irn_dbg_info(node);
4230 ir_node *op = get_IJmp_target(node);
4232 ia32_address_mode_t am;
4233 ia32_address_t *addr = &am.addr;
4235 assert(get_irn_mode(op) == mode_P);
4237 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4239 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4240 addr->mem, am.new_op2);
4241 set_am_attributes(new_node, &am);
4242 SET_IA32_ORIG_NODE(new_node, node);
4244 new_node = fix_mem_proj(new_node, &am);
4249 static ir_node *gen_ia32_l_Add(ir_node *node)
4251 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4252 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4253 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4254 match_commutative | match_am | match_immediate |
4255 match_mode_neutral);
4257 if (is_Proj(lowered)) {
4258 lowered = get_Proj_pred(lowered);
4260 assert(is_ia32_Add(lowered));
4261 set_irn_mode(lowered, mode_T);
4267 static ir_node *gen_ia32_l_Adc(ir_node *node)
4269 return gen_binop_flags(node, new_bd_ia32_Adc,
4270 match_commutative | match_am | match_immediate |
4271 match_mode_neutral);
4275 * Transforms a l_MulS into a "real" MulS node.
4277 * @return the created ia32 Mul node
4279 static ir_node *gen_ia32_l_Mul(ir_node *node)
4281 ir_node *left = get_binop_left(node);
4282 ir_node *right = get_binop_right(node);
4284 return gen_binop(node, left, right, new_bd_ia32_Mul,
4285 match_commutative | match_am | match_mode_neutral);
4289 * Transforms a l_IMulS into a "real" IMul1OPS node.
4291 * @return the created ia32 IMul1OP node
4293 static ir_node *gen_ia32_l_IMul(ir_node *node)
4295 ir_node *left = get_binop_left(node);
4296 ir_node *right = get_binop_right(node);
4298 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4299 match_commutative | match_am | match_mode_neutral);
4302 static ir_node *gen_ia32_l_Sub(ir_node *node)
4304 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4305 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4306 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4307 match_am | match_immediate | match_mode_neutral);
4309 if (is_Proj(lowered)) {
4310 lowered = get_Proj_pred(lowered);
4312 assert(is_ia32_Sub(lowered));
4313 set_irn_mode(lowered, mode_T);
4319 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4321 return gen_binop_flags(node, new_bd_ia32_Sbb,
4322 match_am | match_immediate | match_mode_neutral);
4325 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4327 ir_node *src_block = get_nodes_block(node);
4328 ir_node *block = be_transform_node(src_block);
4329 ir_graph *irg = current_ir_graph;
4330 dbg_info *dbgi = get_irn_dbg_info(node);
4331 ir_node *frame = get_irg_frame(irg);
4332 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4333 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4334 ir_node *new_val_low = be_transform_node(val_low);
4335 ir_node *new_val_high = be_transform_node(val_high);
4337 ir_node *sync, *fild, *res;
4339 ir_node *store_high;
4343 if (ia32_cg_config.use_sse2) {
4344 panic("ia32_l_LLtoFloat not implemented for SSE2");
4348 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4350 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4352 SET_IA32_ORIG_NODE(store_low, node);
4353 SET_IA32_ORIG_NODE(store_high, node);
4355 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4356 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4358 set_ia32_use_frame(store_low);
4359 set_ia32_use_frame(store_high);
4360 set_ia32_op_type(store_low, ia32_AddrModeD);
4361 set_ia32_op_type(store_high, ia32_AddrModeD);
4362 set_ia32_ls_mode(store_low, mode_Iu);
4363 set_ia32_ls_mode(store_high, mode_Is);
4364 add_ia32_am_offs_int(store_high, 4);
4368 sync = new_rd_Sync(dbgi, block, 2, in);
4371 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4373 set_ia32_use_frame(fild);
4374 set_ia32_op_type(fild, ia32_AddrModeS);
4375 set_ia32_ls_mode(fild, mode_Ls);
4377 SET_IA32_ORIG_NODE(fild, node);
4379 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4381 if (! mode_is_signed(get_irn_mode(val_high))) {
4382 ia32_address_mode_t am;
4384 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4387 am.addr.base = get_symconst_base();
4388 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4389 am.addr.mem = nomem;
4392 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4393 am.addr.tls_segment = false;
4394 am.addr.use_frame = 0;
4395 am.addr.frame_entity = NULL;
4396 am.addr.symconst_sign = 0;
4397 am.ls_mode = mode_F;
4398 am.mem_proj = nomem;
4399 am.op_type = ia32_AddrModeS;
4401 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4402 am.pinned = op_pin_state_floats;
4404 am.ins_permuted = false;
4406 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4407 am.new_op1, am.new_op2, get_fpcw());
4408 set_am_attributes(fadd, &am);
4410 set_irn_mode(fadd, mode_T);
4411 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4416 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4418 ir_node *src_block = get_nodes_block(node);
4419 ir_node *block = be_transform_node(src_block);
4420 ir_graph *irg = get_Block_irg(block);
4421 dbg_info *dbgi = get_irn_dbg_info(node);
4422 ir_node *frame = get_irg_frame(irg);
4423 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4424 ir_node *new_val = be_transform_node(val);
4427 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4428 SET_IA32_ORIG_NODE(fist, node);
4429 set_ia32_use_frame(fist);
4430 set_ia32_op_type(fist, ia32_AddrModeD);
4431 set_ia32_ls_mode(fist, mode_Ls);
4433 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4434 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4437 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4439 ir_node *block = be_transform_node(get_nodes_block(node));
4440 ir_graph *irg = get_Block_irg(block);
4441 ir_node *pred = get_Proj_pred(node);
4442 ir_node *new_pred = be_transform_node(pred);
4443 ir_node *frame = get_irg_frame(irg);
4444 dbg_info *dbgi = get_irn_dbg_info(node);
4445 long pn = get_Proj_proj(node);
4450 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4451 SET_IA32_ORIG_NODE(load, node);
4452 set_ia32_use_frame(load);
4453 set_ia32_op_type(load, ia32_AddrModeS);
4454 set_ia32_ls_mode(load, mode_Iu);
4455 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4456 * 32 bit from it with this particular load */
4457 attr = get_ia32_attr(load);
4458 attr->data.need_64bit_stackent = 1;
4460 if (pn == pn_ia32_l_FloattoLL_res_high) {
4461 add_ia32_am_offs_int(load, 4);
4463 assert(pn == pn_ia32_l_FloattoLL_res_low);
4466 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4472 * Transform the Projs of an AddSP.
4474 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4476 ir_node *pred = get_Proj_pred(node);
4477 ir_node *new_pred = be_transform_node(pred);
4478 dbg_info *dbgi = get_irn_dbg_info(node);
4479 long proj = get_Proj_proj(node);
4481 if (proj == pn_be_AddSP_sp) {
4482 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4483 pn_ia32_SubSP_stack);
4484 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4486 } else if (proj == pn_be_AddSP_res) {
4487 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4488 pn_ia32_SubSP_addr);
4489 } else if (proj == pn_be_AddSP_M) {
4490 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4493 panic("No idea how to transform proj->AddSP");
4497 * Transform the Projs of a SubSP.
4499 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4501 ir_node *pred = get_Proj_pred(node);
4502 ir_node *new_pred = be_transform_node(pred);
4503 dbg_info *dbgi = get_irn_dbg_info(node);
4504 long proj = get_Proj_proj(node);
4506 if (proj == pn_be_SubSP_sp) {
4507 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4508 pn_ia32_AddSP_stack);
4509 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4511 } else if (proj == pn_be_SubSP_M) {
4512 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4515 panic("No idea how to transform proj->SubSP");
4519 * Transform and renumber the Projs from a Load.
4521 static ir_node *gen_Proj_Load(ir_node *node)
4524 ir_node *pred = get_Proj_pred(node);
4525 dbg_info *dbgi = get_irn_dbg_info(node);
4526 long proj = get_Proj_proj(node);
4528 /* loads might be part of source address mode matches, so we don't
4529 * transform the ProjMs yet (with the exception of loads whose result is
4532 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4535 /* this is needed, because sometimes we have loops that are only
4536 reachable through the ProjM */
4537 be_enqueue_preds(node);
4538 /* do it in 2 steps, to silence firm verifier */
4539 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4540 set_Proj_proj(res, pn_ia32_mem);
4544 /* renumber the proj */
4545 new_pred = be_transform_node(pred);
4546 if (is_ia32_Load(new_pred)) {
4547 switch ((pn_Load)proj) {
4549 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4551 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4552 case pn_Load_X_except:
4553 /* This Load might raise an exception. Mark it. */
4554 set_ia32_exc_label(new_pred, 1);
4555 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4556 case pn_Load_X_regular:
4557 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4559 } else if (is_ia32_Conv_I2I(new_pred) ||
4560 is_ia32_Conv_I2I8Bit(new_pred)) {
4561 set_irn_mode(new_pred, mode_T);
4562 switch ((pn_Load)proj) {
4564 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4566 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4567 case pn_Load_X_except:
4568 /* This Load might raise an exception. Mark it. */
4569 set_ia32_exc_label(new_pred, 1);
4570 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4571 case pn_Load_X_regular:
4572 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4574 } else if (is_ia32_xLoad(new_pred)) {
4575 switch ((pn_Load)proj) {
4577 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4579 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4580 case pn_Load_X_except:
4581 /* This Load might raise an exception. Mark it. */
4582 set_ia32_exc_label(new_pred, 1);
4583 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4584 case pn_Load_X_regular:
4585 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4587 } else if (is_ia32_vfld(new_pred)) {
4588 switch ((pn_Load)proj) {
4590 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4592 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4593 case pn_Load_X_except:
4594 /* This Load might raise an exception. Mark it. */
4595 set_ia32_exc_label(new_pred, 1);
4596 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4597 case pn_Load_X_regular:
4598 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4601 /* can happen for ProJMs when source address mode happened for the
4604 /* however it should not be the result proj, as that would mean the
4605 load had multiple users and should not have been used for
4607 if (proj != pn_Load_M) {
4608 panic("internal error: transformed node not a Load");
4610 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4613 panic("No idea how to transform Proj(Load) %+F", node);
4616 static ir_node *gen_Proj_Store(ir_node *node)
4618 ir_node *pred = get_Proj_pred(node);
4619 ir_node *new_pred = be_transform_node(pred);
4620 dbg_info *dbgi = get_irn_dbg_info(node);
4621 long pn = get_Proj_proj(node);
4623 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4624 switch ((pn_Store)pn) {
4626 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4627 case pn_Store_X_except:
4628 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4629 case pn_Store_X_regular:
4630 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4632 } else if (is_ia32_vfist(new_pred)) {
4633 switch ((pn_Store)pn) {
4635 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4636 case pn_Store_X_except:
4637 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4638 case pn_Store_X_regular:
4639 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4641 } else if (is_ia32_vfisttp(new_pred)) {
4642 switch ((pn_Store)pn) {
4644 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4645 case pn_Store_X_except:
4646 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4647 case pn_Store_X_regular:
4648 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4650 } else if (is_ia32_vfst(new_pred)) {
4651 switch ((pn_Store)pn) {
4653 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4654 case pn_Store_X_except:
4655 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4656 case pn_Store_X_regular:
4657 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4659 } else if (is_ia32_xStore(new_pred)) {
4660 switch ((pn_Store)pn) {
4662 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4663 case pn_Store_X_except:
4664 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4665 case pn_Store_X_regular:
4666 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4668 } else if (is_Sync(new_pred)) {
4669 /* hack for the case that gen_float_const_Store produced a Sync */
4670 if (pn == pn_Store_M) {
4673 panic("exception control flow for gen_float_const_Store not implemented yet");
4674 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4675 /* destination address mode */
4676 if (pn == pn_Store_M) {
4679 panic("exception control flow for destination AM not implemented yet");
4682 panic("No idea how to transform Proj(Store) %+F", node);
4686 * Transform and renumber the Projs from a Div or Mod instruction.
4688 static ir_node *gen_Proj_Div(ir_node *node)
4690 ir_node *pred = get_Proj_pred(node);
4691 ir_node *new_pred = be_transform_node(pred);
4692 dbg_info *dbgi = get_irn_dbg_info(node);
4693 long proj = get_Proj_proj(node);
4695 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4696 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4698 switch ((pn_Div)proj) {
4700 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4701 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4702 } else if (is_ia32_xDiv(new_pred)) {
4703 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4704 } else if (is_ia32_vfdiv(new_pred)) {
4705 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4707 panic("Div transformed to unexpected thing %+F", new_pred);
4710 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4711 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4712 } else if (is_ia32_xDiv(new_pred)) {
4713 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4714 } else if (is_ia32_vfdiv(new_pred)) {
4715 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4717 panic("Div transformed to unexpected thing %+F", new_pred);
4719 case pn_Div_X_except:
4720 set_ia32_exc_label(new_pred, 1);
4721 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4722 case pn_Div_X_regular:
4723 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4726 panic("No idea how to transform proj->Div");
4730 * Transform and renumber the Projs from a Div or Mod instruction.
4732 static ir_node *gen_Proj_Mod(ir_node *node)
4734 ir_node *pred = get_Proj_pred(node);
4735 ir_node *new_pred = be_transform_node(pred);
4736 dbg_info *dbgi = get_irn_dbg_info(node);
4737 long proj = get_Proj_proj(node);
4739 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4740 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4741 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4743 switch ((pn_Mod)proj) {
4745 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4747 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4748 case pn_Mod_X_except:
4749 set_ia32_exc_label(new_pred, 1);
4750 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4751 case pn_Mod_X_regular:
4752 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4754 panic("No idea how to transform proj->Mod");
4758 * Transform and renumber the Projs from a CopyB.
4760 static ir_node *gen_Proj_CopyB(ir_node *node)
4762 ir_node *pred = get_Proj_pred(node);
4763 ir_node *new_pred = be_transform_node(pred);
4764 dbg_info *dbgi = get_irn_dbg_info(node);
4765 long proj = get_Proj_proj(node);
4767 switch ((pn_CopyB)proj) {
4769 if (is_ia32_CopyB_i(new_pred)) {
4770 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4771 } else if (is_ia32_CopyB(new_pred)) {
4772 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4775 case pn_CopyB_X_regular:
4776 if (is_ia32_CopyB_i(new_pred)) {
4777 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4778 } else if (is_ia32_CopyB(new_pred)) {
4779 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4782 case pn_CopyB_X_except:
4783 if (is_ia32_CopyB_i(new_pred)) {
4784 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4785 } else if (is_ia32_CopyB(new_pred)) {
4786 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4791 panic("No idea how to transform proj->CopyB");
4794 static ir_node *gen_be_Call(ir_node *node)
4796 dbg_info *const dbgi = get_irn_dbg_info(node);
4797 ir_node *const src_block = get_nodes_block(node);
4798 ir_node *const block = be_transform_node(src_block);
4799 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4800 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4801 ir_node *const sp = be_transform_node(src_sp);
4802 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4803 ia32_address_mode_t am;
4804 ia32_address_t *const addr = &am.addr;
4809 ir_node * eax = noreg_GP;
4810 ir_node * ecx = noreg_GP;
4811 ir_node * edx = noreg_GP;
4812 unsigned const pop = be_Call_get_pop(node);
4813 ir_type *const call_tp = be_Call_get_type(node);
4814 int old_no_pic_adjust;
4815 int throws_exception = ir_throws_exception(node);
4817 /* Run the x87 simulator if the call returns a float value */
4818 if (get_method_n_ress(call_tp) > 0) {
4819 ir_type *const res_type = get_method_res_type(call_tp, 0);
4820 ir_mode *const res_mode = get_type_mode(res_type);
4822 if (res_mode != NULL && mode_is_float(res_mode)) {
4823 ir_graph *irg = current_ir_graph;
4824 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4825 irg_data->do_x87_sim = 1;
4829 /* We do not want be_Call direct calls */
4830 assert(be_Call_get_entity(node) == NULL);
4832 /* special case for PIC trampoline calls */
4833 old_no_pic_adjust = ia32_no_pic_adjust;
4834 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4836 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4837 match_am | match_immediate);
4839 ia32_no_pic_adjust = old_no_pic_adjust;
4841 i = get_irn_arity(node) - 1;
4842 fpcw = be_transform_node(get_irn_n(node, i--));
4843 for (; i >= n_be_Call_first_arg; --i) {
4844 arch_register_req_t const *const req
4845 = arch_get_irn_register_req_in(node, i);
4846 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4848 assert(req->type == arch_register_req_type_limited);
4849 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4851 switch (*req->limited) {
4852 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4853 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4854 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4855 default: panic("Invalid GP register for register parameter");
4859 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4860 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4861 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4862 ir_set_throws_exception(call, throws_exception);
4863 set_am_attributes(call, &am);
4864 call = fix_mem_proj(call, &am);
4866 if (get_irn_pinned(node) == op_pin_state_pinned)
4867 set_irn_pinned(call, op_pin_state_pinned);
4869 SET_IA32_ORIG_NODE(call, node);
4871 if (ia32_cg_config.use_sse2) {
4872 /* remember this call for post-processing */
4873 ARR_APP1(ir_node *, call_list, call);
4874 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4881 * Transform Builtin trap
4883 static ir_node *gen_trap(ir_node *node)
4885 dbg_info *dbgi = get_irn_dbg_info(node);
4886 ir_node *block = be_transform_node(get_nodes_block(node));
4887 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4889 return new_bd_ia32_UD2(dbgi, block, mem);
4893 * Transform Builtin debugbreak
4895 static ir_node *gen_debugbreak(ir_node *node)
4897 dbg_info *dbgi = get_irn_dbg_info(node);
4898 ir_node *block = be_transform_node(get_nodes_block(node));
4899 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4901 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4905 * Transform Builtin return_address
4907 static ir_node *gen_return_address(ir_node *node)
4909 ir_node *param = get_Builtin_param(node, 0);
4910 ir_node *frame = get_Builtin_param(node, 1);
4911 dbg_info *dbgi = get_irn_dbg_info(node);
4912 ir_tarval *tv = get_Const_tarval(param);
4913 ir_graph *irg = get_irn_irg(node);
4914 unsigned long value = get_tarval_long(tv);
4916 ir_node *block = be_transform_node(get_nodes_block(node));
4917 ir_node *ptr = be_transform_node(frame);
4921 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4922 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4923 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4926 /* load the return address from this frame */
4927 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4929 set_irn_pinned(load, get_irn_pinned(node));
4930 set_ia32_op_type(load, ia32_AddrModeS);
4931 set_ia32_ls_mode(load, mode_Iu);
4933 set_ia32_am_offs_int(load, 0);
4934 set_ia32_use_frame(load);
4935 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4937 if (get_irn_pinned(node) == op_pin_state_floats) {
4938 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4939 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4940 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4941 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4944 SET_IA32_ORIG_NODE(load, node);
4945 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4949 * Transform Builtin frame_address
4951 static ir_node *gen_frame_address(ir_node *node)
4953 ir_node *param = get_Builtin_param(node, 0);
4954 ir_node *frame = get_Builtin_param(node, 1);
4955 dbg_info *dbgi = get_irn_dbg_info(node);
4956 ir_tarval *tv = get_Const_tarval(param);
4957 ir_graph *irg = get_irn_irg(node);
4958 unsigned long value = get_tarval_long(tv);
4960 ir_node *block = be_transform_node(get_nodes_block(node));
4961 ir_node *ptr = be_transform_node(frame);
4966 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4967 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4968 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4971 /* load the frame address from this frame */
4972 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4974 set_irn_pinned(load, get_irn_pinned(node));
4975 set_ia32_op_type(load, ia32_AddrModeS);
4976 set_ia32_ls_mode(load, mode_Iu);
4978 ent = ia32_get_frame_address_entity(irg);
4980 set_ia32_am_offs_int(load, 0);
4981 set_ia32_use_frame(load);
4982 set_ia32_frame_ent(load, ent);
4984 /* will fail anyway, but gcc does this: */
4985 set_ia32_am_offs_int(load, 0);
4988 if (get_irn_pinned(node) == op_pin_state_floats) {
4989 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4990 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4991 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4992 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4995 SET_IA32_ORIG_NODE(load, node);
4996 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
5000 * Transform Builtin frame_address
5002 static ir_node *gen_prefetch(ir_node *node)
5005 ir_node *ptr, *block, *mem, *base, *idx;
5006 ir_node *param, *new_node;
5009 ia32_address_t addr;
5011 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5012 /* no prefetch at all, route memory */
5013 return be_transform_node(get_Builtin_mem(node));
5016 param = get_Builtin_param(node, 1);
5017 tv = get_Const_tarval(param);
5018 rw = get_tarval_long(tv);
5020 /* construct load address */
5021 memset(&addr, 0, sizeof(addr));
5022 ptr = get_Builtin_param(node, 0);
5023 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5030 base = be_transform_node(base);
5036 idx = be_transform_node(idx);
5039 dbgi = get_irn_dbg_info(node);
5040 block = be_transform_node(get_nodes_block(node));
5041 mem = be_transform_node(get_Builtin_mem(node));
5043 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5044 /* we have 3DNow!, this was already checked above */
5045 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
5046 } else if (ia32_cg_config.use_sse_prefetch) {
5047 /* note: rw == 1 is IGNORED in that case */
5048 param = get_Builtin_param(node, 2);
5049 tv = get_Const_tarval(param);
5050 locality = get_tarval_long(tv);
5052 /* SSE style prefetch */
5055 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5058 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5061 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5064 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5068 assert(ia32_cg_config.use_3dnow_prefetch);
5069 /* 3DNow! style prefetch */
5070 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5073 set_irn_pinned(new_node, get_irn_pinned(node));
5074 set_ia32_op_type(new_node, ia32_AddrModeS);
5075 set_ia32_ls_mode(new_node, mode_Bu);
5076 set_address(new_node, &addr);
5078 SET_IA32_ORIG_NODE(new_node, node);
5080 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5084 * Transform bsf like node
5086 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5088 ir_node *param = get_Builtin_param(node, 0);
5089 dbg_info *dbgi = get_irn_dbg_info(node);
5091 ir_node *block = get_nodes_block(node);
5092 ir_node *new_block = be_transform_node(block);
5094 ia32_address_mode_t am;
5095 ia32_address_t *addr = &am.addr;
5098 match_arguments(&am, block, NULL, param, NULL, match_am);
5100 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5101 set_am_attributes(cnt, &am);
5102 set_ia32_ls_mode(cnt, get_irn_mode(param));
5104 SET_IA32_ORIG_NODE(cnt, node);
5105 return fix_mem_proj(cnt, &am);
5109 * Transform builtin ffs.
5111 static ir_node *gen_ffs(ir_node *node)
5113 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5114 ir_node *real = skip_Proj(bsf);
5115 dbg_info *dbgi = get_irn_dbg_info(real);
5116 ir_node *block = get_nodes_block(real);
5117 ir_node *flag, *set, *conv, *neg, *orn, *add;
5120 if (get_irn_mode(real) != mode_T) {
5121 set_irn_mode(real, mode_T);
5122 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5125 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5128 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5129 SET_IA32_ORIG_NODE(set, node);
5132 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5133 SET_IA32_ORIG_NODE(conv, node);
5136 neg = new_bd_ia32_Neg(dbgi, block, conv);
5139 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5140 set_ia32_commutative(orn);
5143 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5144 add_ia32_am_offs_int(add, 1);
5149 * Transform builtin clz.
5151 static ir_node *gen_clz(ir_node *node)
5153 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5154 ir_node *real = skip_Proj(bsr);
5155 dbg_info *dbgi = get_irn_dbg_info(real);
5156 ir_node *block = get_nodes_block(real);
5157 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5159 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5163 * Transform builtin ctz.
5165 static ir_node *gen_ctz(ir_node *node)
5167 return gen_unop_AM(node, new_bd_ia32_Bsf);
5171 * Transform builtin parity.
5173 static ir_node *gen_parity(ir_node *node)
5175 dbg_info *dbgi = get_irn_dbg_info(node);
5176 ir_node *block = get_nodes_block(node);
5177 ir_node *new_block = be_transform_node(block);
5178 ir_node *param = get_Builtin_param(node, 0);
5179 ir_node *new_param = be_transform_node(param);
5182 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5183 * so we have to do complicated xoring first.
5184 * (we should also better lower this before the backend so we still have a
5185 * chance for CSE, constant folding and other goodies for some of these
5188 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5189 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5190 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5192 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5195 set_irn_mode(xor2, mode_T);
5196 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5199 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5200 SET_IA32_ORIG_NODE(new_node, node);
5203 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5204 nomem, new_node, mode_Bu);
5205 SET_IA32_ORIG_NODE(new_node, node);
5210 * Transform builtin popcount
5212 static ir_node *gen_popcount(ir_node *node)
5214 ir_node *param = get_Builtin_param(node, 0);
5215 dbg_info *dbgi = get_irn_dbg_info(node);
5217 ir_node *block = get_nodes_block(node);
5218 ir_node *new_block = be_transform_node(block);
5221 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5223 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5224 if (ia32_cg_config.use_popcnt) {
5225 ia32_address_mode_t am;
5226 ia32_address_t *addr = &am.addr;
5229 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5231 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5232 set_am_attributes(cnt, &am);
5233 set_ia32_ls_mode(cnt, get_irn_mode(param));
5235 SET_IA32_ORIG_NODE(cnt, node);
5236 return fix_mem_proj(cnt, &am);
5239 new_param = be_transform_node(param);
5241 /* do the standard popcount algo */
5242 /* TODO: This is stupid, we should transform this before the backend,
5243 * to get CSE, localopts, etc. for the operations
5244 * TODO: This is also not the optimal algorithm (it is just the starting
5245 * example in hackers delight, they optimize it more on the following page)
5246 * But I'm too lazy to fix this now, as the code should get lowered before
5247 * the backend anyway.
5250 /* m1 = x & 0x55555555 */
5251 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5252 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5255 simm = ia32_create_Immediate(NULL, 0, 1);
5256 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5258 /* m2 = s1 & 0x55555555 */
5259 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5262 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5264 /* m4 = m3 & 0x33333333 */
5265 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5266 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5269 simm = ia32_create_Immediate(NULL, 0, 2);
5270 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5272 /* m5 = s2 & 0x33333333 */
5273 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5276 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5278 /* m7 = m6 & 0x0F0F0F0F */
5279 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5280 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5283 simm = ia32_create_Immediate(NULL, 0, 4);
5284 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5286 /* m8 = s3 & 0x0F0F0F0F */
5287 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5290 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5292 /* m10 = m9 & 0x00FF00FF */
5293 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5294 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5297 simm = ia32_create_Immediate(NULL, 0, 8);
5298 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5300 /* m11 = s4 & 0x00FF00FF */
5301 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5303 /* m12 = m10 + m11 */
5304 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5306 /* m13 = m12 & 0x0000FFFF */
5307 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5308 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5310 /* s5 = m12 >> 16 */
5311 simm = ia32_create_Immediate(NULL, 0, 16);
5312 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5314 /* res = m13 + s5 */
5315 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5319 * Transform builtin byte swap.
5321 static ir_node *gen_bswap(ir_node *node)
5323 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5324 dbg_info *dbgi = get_irn_dbg_info(node);
5326 ir_node *block = get_nodes_block(node);
5327 ir_node *new_block = be_transform_node(block);
5328 ir_mode *mode = get_irn_mode(param);
5329 unsigned size = get_mode_size_bits(mode);
5330 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5334 if (ia32_cg_config.use_i486) {
5335 /* swap available */
5336 return new_bd_ia32_Bswap(dbgi, new_block, param);
5338 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5339 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5341 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5342 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5344 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5346 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5347 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5349 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5350 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5353 /* swap16 always available */
5354 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5357 panic("Invalid bswap size (%d)", size);
5362 * Transform builtin outport.
5364 static ir_node *gen_outport(ir_node *node)
5366 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5367 ir_node *oldv = get_Builtin_param(node, 1);
5368 ir_mode *mode = get_irn_mode(oldv);
5369 ir_node *value = be_transform_node(oldv);
5370 ir_node *block = be_transform_node(get_nodes_block(node));
5371 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5372 dbg_info *dbgi = get_irn_dbg_info(node);
5374 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5375 set_ia32_ls_mode(res, mode);
5380 * Transform builtin inport.
5382 static ir_node *gen_inport(ir_node *node)
5384 ir_type *tp = get_Builtin_type(node);
5385 ir_type *rstp = get_method_res_type(tp, 0);
5386 ir_mode *mode = get_type_mode(rstp);
5387 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5388 ir_node *block = be_transform_node(get_nodes_block(node));
5389 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5390 dbg_info *dbgi = get_irn_dbg_info(node);
5392 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5393 set_ia32_ls_mode(res, mode);
5395 /* check for missing Result Proj */
5400 * Transform a builtin inner trampoline
5402 static ir_node *gen_inner_trampoline(ir_node *node)
5404 ir_node *ptr = get_Builtin_param(node, 0);
5405 ir_node *callee = get_Builtin_param(node, 1);
5406 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5407 ir_node *mem = get_Builtin_mem(node);
5408 ir_node *block = get_nodes_block(node);
5409 ir_node *new_block = be_transform_node(block);
5413 ir_node *trampoline;
5415 dbg_info *dbgi = get_irn_dbg_info(node);
5416 ia32_address_t addr;
5418 /* construct store address */
5419 memset(&addr, 0, sizeof(addr));
5420 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5422 if (addr.base == NULL) {
5423 addr.base = noreg_GP;
5425 addr.base = be_transform_node(addr.base);
5428 if (addr.index == NULL) {
5429 addr.index = noreg_GP;
5431 addr.index = be_transform_node(addr.index);
5433 addr.mem = be_transform_node(mem);
5435 /* mov ecx, <env> */
5436 val = ia32_create_Immediate(NULL, 0, 0xB9);
5437 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5438 addr.index, addr.mem, val);
5439 set_irn_pinned(store, get_irn_pinned(node));
5440 set_ia32_op_type(store, ia32_AddrModeD);
5441 set_ia32_ls_mode(store, mode_Bu);
5442 set_address(store, &addr);
5446 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5447 addr.index, addr.mem, env);
5448 set_irn_pinned(store, get_irn_pinned(node));
5449 set_ia32_op_type(store, ia32_AddrModeD);
5450 set_ia32_ls_mode(store, mode_Iu);
5451 set_address(store, &addr);
5455 /* jmp rel <callee> */
5456 val = ia32_create_Immediate(NULL, 0, 0xE9);
5457 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5458 addr.index, addr.mem, val);
5459 set_irn_pinned(store, get_irn_pinned(node));
5460 set_ia32_op_type(store, ia32_AddrModeD);
5461 set_ia32_ls_mode(store, mode_Bu);
5462 set_address(store, &addr);
5466 trampoline = be_transform_node(ptr);
5468 /* the callee is typically an immediate */
5469 if (is_SymConst(callee)) {
5470 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5472 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5473 add_ia32_am_offs_int(rel, -10);
5475 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5477 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5478 addr.index, addr.mem, rel);
5479 set_irn_pinned(store, get_irn_pinned(node));
5480 set_ia32_op_type(store, ia32_AddrModeD);
5481 set_ia32_ls_mode(store, mode_Iu);
5482 set_address(store, &addr);
5487 return new_r_Tuple(new_block, 2, in);
5491 * Transform Builtin node.
5493 static ir_node *gen_Builtin(ir_node *node)
5495 ir_builtin_kind kind = get_Builtin_kind(node);
5499 return gen_trap(node);
5500 case ir_bk_debugbreak:
5501 return gen_debugbreak(node);
5502 case ir_bk_return_address:
5503 return gen_return_address(node);
5504 case ir_bk_frame_address:
5505 return gen_frame_address(node);
5506 case ir_bk_prefetch:
5507 return gen_prefetch(node);
5509 return gen_ffs(node);
5511 return gen_clz(node);
5513 return gen_ctz(node);
5515 return gen_parity(node);
5516 case ir_bk_popcount:
5517 return gen_popcount(node);
5519 return gen_bswap(node);
5521 return gen_outport(node);
5523 return gen_inport(node);
5524 case ir_bk_inner_trampoline:
5525 return gen_inner_trampoline(node);
5527 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5531 * Transform Proj(Builtin) node.
5533 static ir_node *gen_Proj_Builtin(ir_node *proj)
5535 ir_node *node = get_Proj_pred(proj);
5536 ir_node *new_node = be_transform_node(node);
5537 ir_builtin_kind kind = get_Builtin_kind(node);
5540 case ir_bk_return_address:
5541 case ir_bk_frame_address:
5546 case ir_bk_popcount:
5548 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5551 case ir_bk_debugbreak:
5552 case ir_bk_prefetch:
5554 assert(get_Proj_proj(proj) == pn_Builtin_M);
5557 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5558 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5560 assert(get_Proj_proj(proj) == pn_Builtin_M);
5561 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5563 case ir_bk_inner_trampoline:
5564 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5565 return get_Tuple_pred(new_node, 1);
5567 assert(get_Proj_proj(proj) == pn_Builtin_M);
5568 return get_Tuple_pred(new_node, 0);
5571 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5574 static ir_node *gen_be_IncSP(ir_node *node)
5576 ir_node *res = be_duplicate_node(node);
5577 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5583 * Transform the Projs from a be_Call.
5585 static ir_node *gen_Proj_be_Call(ir_node *node)
5587 ir_node *call = get_Proj_pred(node);
5588 ir_node *new_call = be_transform_node(call);
5589 dbg_info *dbgi = get_irn_dbg_info(node);
5590 long proj = get_Proj_proj(node);
5591 ir_mode *mode = get_irn_mode(node);
5594 if (proj == pn_be_Call_M) {
5595 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5597 /* transform call modes */
5598 if (mode_is_data(mode)) {
5599 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5603 /* Map from be_Call to ia32_Call proj number */
5604 if (proj == pn_be_Call_sp) {
5605 proj = pn_ia32_Call_stack;
5606 } else if (proj == pn_be_Call_M) {
5607 proj = pn_ia32_Call_M;
5608 } else if (proj == pn_be_Call_X_except) {
5609 proj = pn_ia32_Call_X_except;
5610 } else if (proj == pn_be_Call_X_regular) {
5611 proj = pn_ia32_Call_X_regular;
5613 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5614 int const n_outs = arch_get_irn_n_outs(new_call);
5617 assert(proj >= pn_be_Call_first_res);
5618 assert(req->type & arch_register_req_type_limited);
5620 for (i = 0; i < n_outs; ++i) {
5621 arch_register_req_t const *const new_req
5622 = arch_get_irn_register_req_out(new_call, i);
5624 if (!(new_req->type & arch_register_req_type_limited) ||
5625 new_req->cls != req->cls ||
5626 *new_req->limited != *req->limited)
5635 res = new_rd_Proj(dbgi, new_call, mode, proj);
5637 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5639 case pn_ia32_Call_stack:
5640 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5643 case pn_ia32_Call_fpcw:
5644 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5652 * Transform the Projs from a Cmp.
5654 static ir_node *gen_Proj_Cmp(ir_node *node)
5656 /* this probably means not all mode_b nodes were lowered... */
5657 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5661 static ir_node *gen_Proj_ASM(ir_node *node)
5663 ir_mode *mode = get_irn_mode(node);
5664 ir_node *pred = get_Proj_pred(node);
5665 ir_node *new_pred = be_transform_node(pred);
5666 long pos = get_Proj_proj(node);
5668 if (mode == mode_M) {
5669 pos = arch_get_irn_n_outs(new_pred)-1;
5670 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5672 } else if (mode_is_float(mode)) {
5675 panic("unexpected proj mode at ASM");
5678 return new_r_Proj(new_pred, mode, pos);
5682 * Transform and potentially renumber Proj nodes.
5684 static ir_node *gen_Proj(ir_node *node)
5686 ir_node *pred = get_Proj_pred(node);
5689 switch (get_irn_opcode(pred)) {
5691 return gen_Proj_Load(node);
5693 return gen_Proj_Store(node);
5695 return gen_Proj_ASM(node);
5697 return gen_Proj_Builtin(node);
5699 return gen_Proj_Div(node);
5701 return gen_Proj_Mod(node);
5703 return gen_Proj_CopyB(node);
5705 return gen_Proj_be_SubSP(node);
5707 return gen_Proj_be_AddSP(node);
5709 return gen_Proj_be_Call(node);
5711 return gen_Proj_Cmp(node);
5713 proj = get_Proj_proj(node);
5715 case pn_Start_X_initial_exec: {
5716 ir_node *block = get_nodes_block(pred);
5717 ir_node *new_block = be_transform_node(block);
5718 dbg_info *dbgi = get_irn_dbg_info(node);
5719 /* we exchange the ProjX with a jump */
5720 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5728 if (is_ia32_l_FloattoLL(pred)) {
5729 return gen_Proj_l_FloattoLL(node);
5731 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5735 ir_mode *mode = get_irn_mode(node);
5736 if (ia32_mode_needs_gp_reg(mode)) {
5737 ir_node *new_pred = be_transform_node(pred);
5738 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5739 get_Proj_proj(node));
5740 new_proj->node_nr = node->node_nr;
5745 return be_duplicate_node(node);
5749 * Enters all transform functions into the generic pointer
5751 static void register_transformers(void)
5753 /* first clear the generic function pointer for all ops */
5754 be_start_transform_setup();
5756 be_set_transform_function(op_Add, gen_Add);
5757 be_set_transform_function(op_And, gen_And);
5758 be_set_transform_function(op_ASM, ia32_gen_ASM);
5759 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5760 be_set_transform_function(op_be_Call, gen_be_Call);
5761 be_set_transform_function(op_be_Copy, gen_be_Copy);
5762 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5763 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5764 be_set_transform_function(op_be_Return, gen_be_Return);
5765 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5766 be_set_transform_function(op_Builtin, gen_Builtin);
5767 be_set_transform_function(op_Cmp, gen_Cmp);
5768 be_set_transform_function(op_Cond, gen_Cond);
5769 be_set_transform_function(op_Const, gen_Const);
5770 be_set_transform_function(op_Conv, gen_Conv);
5771 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5772 be_set_transform_function(op_Div, gen_Div);
5773 be_set_transform_function(op_Eor, gen_Eor);
5774 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5775 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5776 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5777 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5778 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5779 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5780 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5781 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5782 be_set_transform_function(op_ia32_l_Setcc, gen_ia32_l_Setcc);
5783 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5784 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5785 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5786 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5787 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5788 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5789 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5790 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5791 be_set_transform_function(op_IJmp, gen_IJmp);
5792 be_set_transform_function(op_Jmp, gen_Jmp);
5793 be_set_transform_function(op_Load, gen_Load);
5794 be_set_transform_function(op_Minus, gen_Minus);
5795 be_set_transform_function(op_Mod, gen_Mod);
5796 be_set_transform_function(op_Mul, gen_Mul);
5797 be_set_transform_function(op_Mulh, gen_Mulh);
5798 be_set_transform_function(op_Mux, gen_Mux);
5799 be_set_transform_function(op_Not, gen_Not);
5800 be_set_transform_function(op_Or, gen_Or);
5801 be_set_transform_function(op_Phi, gen_Phi);
5802 be_set_transform_function(op_Proj, gen_Proj);
5803 be_set_transform_function(op_Rotl, gen_Rotl);
5804 be_set_transform_function(op_Shl, gen_Shl);
5805 be_set_transform_function(op_Shr, gen_Shr);
5806 be_set_transform_function(op_Shrs, gen_Shrs);
5807 be_set_transform_function(op_Store, gen_Store);
5808 be_set_transform_function(op_Sub, gen_Sub);
5809 be_set_transform_function(op_SymConst, gen_SymConst);
5810 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5814 * Pre-transform all unknown and noreg nodes.
5816 static void ia32_pretransform_node(void)
5818 ir_graph *irg = current_ir_graph;
5819 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5821 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5822 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5823 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5824 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5825 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5827 nomem = get_irg_no_mem(irg);
5828 noreg_GP = ia32_new_NoReg_gp(irg);
5832 * Post-process all calls if we are in SSE mode.
5833 * The ABI requires that the results are in st0, copy them
5834 * to a xmm register.
5836 static void postprocess_fp_call_results(void)
5840 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5841 ir_node *call = call_list[i];
5842 ir_type *mtp = call_types[i];
5845 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5846 ir_type *res_tp = get_method_res_type(mtp, j);
5847 ir_node *res, *new_res;
5848 const ir_edge_t *edge, *next;
5851 if (! is_atomic_type(res_tp)) {
5852 /* no floating point return */
5855 res_mode = get_type_mode(res_tp);
5856 if (! mode_is_float(res_mode)) {
5857 /* no floating point return */
5861 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5864 /* now patch the users */
5865 foreach_out_edge_safe(res, edge, next) {
5866 ir_node *succ = get_edge_src_irn(edge);
5869 if (be_is_Keep(succ))
5872 if (is_ia32_xStore(succ)) {
5873 /* an xStore can be patched into an vfst */
5874 dbg_info *db = get_irn_dbg_info(succ);
5875 ir_node *block = get_nodes_block(succ);
5876 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5877 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5878 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5879 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5880 ir_mode *mode = get_ia32_ls_mode(succ);
5882 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5883 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5884 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5885 if (is_ia32_use_frame(succ))
5886 set_ia32_use_frame(st);
5887 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5888 set_irn_pinned(st, get_irn_pinned(succ));
5889 set_ia32_op_type(st, ia32_AddrModeD);
5891 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5892 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5893 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5900 if (new_res == NULL) {
5901 dbg_info *db = get_irn_dbg_info(call);
5902 ir_node *block = get_nodes_block(call);
5903 ir_node *frame = get_irg_frame(current_ir_graph);
5904 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5905 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5906 ir_node *vfst, *xld, *new_mem;
5909 /* store st(0) on stack */
5910 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5912 set_ia32_op_type(vfst, ia32_AddrModeD);
5913 set_ia32_use_frame(vfst);
5915 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5917 /* load into SSE register */
5918 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5920 set_ia32_op_type(xld, ia32_AddrModeS);
5921 set_ia32_use_frame(xld);
5923 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5924 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5926 if (old_mem != NULL) {
5927 edges_reroute(old_mem, new_mem);
5931 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5937 /* do the transformation */
5938 void ia32_transform_graph(ir_graph *irg)
5942 register_transformers();
5943 initial_fpcw = NULL;
5944 ia32_no_pic_adjust = 0;
5946 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5948 be_timer_push(T_HEIGHTS);
5949 ia32_heights = heights_new(irg);
5950 be_timer_pop(T_HEIGHTS);
5951 ia32_calculate_non_address_mode_nodes(irg);
5953 /* the transform phase is not safe for CSE (yet) because several nodes get
5954 * attributes set after their creation */
5955 cse_last = get_opt_cse();
5958 call_list = NEW_ARR_F(ir_node *, 0);
5959 call_types = NEW_ARR_F(ir_type *, 0);
5960 be_transform_graph(irg, ia32_pretransform_node);
5962 if (ia32_cg_config.use_sse2)
5963 postprocess_fp_call_results();
5964 DEL_ARR_F(call_types);
5965 DEL_ARR_F(call_list);
5967 set_opt_cse(cse_last);
5969 ia32_free_non_address_mode_nodes();
5970 heights_free(ia32_heights);
5971 ia32_heights = NULL;
5974 void ia32_init_transform(void)
5976 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");