2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
231 if (ia32_cg_config.use_sse2) {
232 ir_tarval *tv = get_Const_tarval(node);
233 if (tarval_is_null(tv)) {
234 load = new_bd_ia32_xZero(dbgi, block);
235 set_ia32_ls_mode(load, mode);
237 #ifdef CONSTRUCT_SSE_CONST
238 } else if (tarval_is_one(tv)) {
239 int cnst = mode == mode_F ? 26 : 55;
240 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
241 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
242 ir_node *pslld, *psrld;
244 load = new_bd_ia32_xAllOnes(dbgi, block);
245 set_ia32_ls_mode(load, mode);
246 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
247 set_ia32_ls_mode(pslld, mode);
248 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
249 set_ia32_ls_mode(psrld, mode);
251 #endif /* CONSTRUCT_SSE_CONST */
252 } else if (mode == mode_F) {
253 /* we can place any 32bit constant by using a movd gp, sse */
254 unsigned val = get_tarval_sub_bits(tv, 0) |
255 (get_tarval_sub_bits(tv, 1) << 8) |
256 (get_tarval_sub_bits(tv, 2) << 16) |
257 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
259 load = new_bd_ia32_xMovd(dbgi, block, cnst);
260 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
885 am->op_type = ia32_Normal;
887 if (flags & match_try_am) {
893 mode = get_irn_mode(op2);
894 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
895 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
897 new_op2 = create_upconv(op2, NULL);
898 am->ls_mode = mode_Iu;
900 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
902 new_op2 = be_transform_node(op2);
903 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
906 if (addr->base == NULL)
907 addr->base = noreg_GP;
908 if (addr->index == NULL)
909 addr->index = noreg_GP;
910 if (addr->mem == NULL)
913 am->new_op1 = new_op1;
914 am->new_op2 = new_op2;
915 am->commutative = commutative;
919 * "Fixes" a node that uses address mode by turning it into mode_T
920 * and returning a pn_ia32_res Proj.
922 * @param node the node
923 * @param am its address mode
925 * @return a Proj(pn_ia32_res) if a memory address mode is used,
928 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
933 if (am->mem_proj == NULL)
936 /* we have to create a mode_T so the old MemProj can attach to us */
937 mode = get_irn_mode(node);
938 load = get_Proj_pred(am->mem_proj);
940 be_set_transformed_node(load, node);
942 if (mode != mode_T) {
943 set_irn_mode(node, mode_T);
944 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
951 * Construct a standard binary operation, set AM and immediate if required.
953 * @param node The original node for which the binop is created
954 * @param op1 The first operand
955 * @param op2 The second operand
956 * @param func The node constructor function
957 * @return The constructed ia32 node.
959 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
960 construct_binop_func *func, match_flags_t flags)
963 ir_node *block, *new_block, *new_node;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 block = get_nodes_block(node);
968 match_arguments(&am, block, op1, op2, NULL, flags);
970 dbgi = get_irn_dbg_info(node);
971 new_block = be_transform_node(block);
972 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
987 * Generic names for the inputs of an ia32 binary op.
990 n_ia32_l_binop_left, /**< ia32 left input */
991 n_ia32_l_binop_right, /**< ia32 right input */
992 n_ia32_l_binop_eflags /**< ia32 eflags input */
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
999 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1002 * Construct a binary operation which also consumes the eflags.
1004 * @param node The node to transform
1005 * @param func The node constructor function
1006 * @param flags The match flags
1007 * @return The constructor ia32 node
1009 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1010 match_flags_t flags)
1012 ir_node *src_block = get_nodes_block(node);
1013 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1014 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1015 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1017 ir_node *block, *new_node, *new_eflags;
1018 ia32_address_mode_t am;
1019 ia32_address_t *addr = &am.addr;
1021 match_arguments(&am, src_block, op1, op2, eflags, flags);
1023 dbgi = get_irn_dbg_info(node);
1024 block = be_transform_node(src_block);
1025 new_eflags = be_transform_node(eflags);
1026 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1027 am.new_op1, am.new_op2, new_eflags);
1028 set_am_attributes(new_node, &am);
1029 /* we can't use source address mode anymore when using immediates */
1030 if (!(flags & match_am_and_immediates) &&
1031 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1032 set_ia32_am_support(new_node, ia32_am_none);
1033 SET_IA32_ORIG_NODE(new_node, node);
1035 new_node = fix_mem_proj(new_node, &am);
1040 static ir_node *get_fpcw(void)
1042 if (initial_fpcw != NULL)
1043 return initial_fpcw;
1045 initial_fpcw = be_transform_node(old_initial_fpcw);
1046 return initial_fpcw;
1050 * Construct a standard binary operation, set AM and immediate if required.
1052 * @param op1 The first operand
1053 * @param op2 The second operand
1054 * @param func The node constructor function
1055 * @return The constructed ia32 node.
1057 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1058 construct_binop_float_func *func)
1060 ir_mode *mode = get_irn_mode(node);
1062 ir_node *block, *new_block, *new_node;
1063 ia32_address_mode_t am;
1064 ia32_address_t *addr = &am.addr;
1065 ia32_x87_attr_t *attr;
1066 /* All operations are considered commutative, because there are reverse
1068 match_flags_t flags = match_commutative;
1070 /* happens for div nodes... */
1071 if (mode == mode_T) {
1073 mode = get_Div_resmode(node);
1075 panic("can't determine mode");
1078 /* cannot use address mode with long double on x87 */
1079 if (get_mode_size_bits(mode) <= 64)
1082 block = get_nodes_block(node);
1083 match_arguments(&am, block, op1, op2, NULL, flags);
1085 dbgi = get_irn_dbg_info(node);
1086 new_block = be_transform_node(block);
1087 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1088 am.new_op1, am.new_op2, get_fpcw());
1089 set_am_attributes(new_node, &am);
1091 attr = get_ia32_x87_attr(new_node);
1092 attr->attr.data.ins_permuted = am.ins_permuted;
1094 SET_IA32_ORIG_NODE(new_node, node);
1096 new_node = fix_mem_proj(new_node, &am);
1102 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1104 * @param op1 The first operand
1105 * @param op2 The second operand
1106 * @param func The node constructor function
1107 * @return The constructed ia32 node.
1109 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1110 construct_shift_func *func,
1111 match_flags_t flags)
1114 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1116 assert(! mode_is_float(get_irn_mode(node)));
1117 assert(flags & match_immediate);
1118 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1120 if (flags & match_mode_neutral) {
1121 op1 = ia32_skip_downconv(op1);
1122 new_op1 = be_transform_node(op1);
1123 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1124 new_op1 = create_upconv(op1, node);
1126 new_op1 = be_transform_node(op1);
1129 /* the shift amount can be any mode that is bigger than 5 bits, since all
1130 * other bits are ignored anyway */
1131 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1132 ir_node *const op = get_Conv_op(op2);
1133 if (mode_is_float(get_irn_mode(op)))
1136 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1138 new_op2 = create_immediate_or_transform(op2, 0);
1140 dbgi = get_irn_dbg_info(node);
1141 block = get_nodes_block(node);
1142 new_block = be_transform_node(block);
1143 new_node = func(dbgi, new_block, new_op1, new_op2);
1144 SET_IA32_ORIG_NODE(new_node, node);
1146 /* lowered shift instruction may have a dependency operand, handle it here */
1147 if (get_irn_arity(node) == 3) {
1148 /* we have a dependency */
1149 ir_node* dep = get_irn_n(node, 2);
1150 if (get_irn_n_edges(dep) > 1) {
1151 /* ... which has at least one user other than 'node' */
1152 ir_node *new_dep = be_transform_node(dep);
1153 add_irn_dep(new_node, new_dep);
1162 * Construct a standard unary operation, set AM and immediate if required.
1164 * @param op The operand
1165 * @param func The node constructor function
1166 * @return The constructed ia32 node.
1168 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1169 match_flags_t flags)
1172 ir_node *block, *new_block, *new_op, *new_node;
1174 assert(flags == 0 || flags == match_mode_neutral);
1175 if (flags & match_mode_neutral) {
1176 op = ia32_skip_downconv(op);
1179 new_op = be_transform_node(op);
1180 dbgi = get_irn_dbg_info(node);
1181 block = get_nodes_block(node);
1182 new_block = be_transform_node(block);
1183 new_node = func(dbgi, new_block, new_op);
1185 SET_IA32_ORIG_NODE(new_node, node);
1190 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1191 ia32_address_t *addr)
1193 ir_node *base, *index, *res;
1199 base = be_transform_node(base);
1202 index = addr->index;
1203 if (index == NULL) {
1206 index = be_transform_node(index);
1209 /* segment overrides are ineffective for Leas :-( so we have to patch
1211 if (addr->tls_segment) {
1212 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1213 assert(addr->symconst_ent != NULL);
1214 if (base == noreg_GP)
1217 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1218 addr->tls_segment = false;
1221 res = new_bd_ia32_Lea(dbgi, block, base, index);
1222 set_address(res, addr);
1228 * Returns non-zero if a given address mode has a symbolic or
1229 * numerical offset != 0.
1231 static int am_has_immediates(const ia32_address_t *addr)
1233 return addr->offset != 0 || addr->symconst_ent != NULL
1234 || addr->frame_entity || addr->use_frame;
1238 * Creates an ia32 Add.
1240 * @return the created ia32 Add node
1242 static ir_node *gen_Add(ir_node *node)
1244 ir_mode *mode = get_irn_mode(node);
1245 ir_node *op1 = get_Add_left(node);
1246 ir_node *op2 = get_Add_right(node);
1248 ir_node *block, *new_block, *new_node, *add_immediate_op;
1249 ia32_address_t addr;
1250 ia32_address_mode_t am;
1252 if (mode_is_float(mode)) {
1253 if (ia32_cg_config.use_sse2)
1254 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1255 match_commutative | match_am);
1257 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1260 ia32_mark_non_am(node);
1262 op2 = ia32_skip_downconv(op2);
1263 op1 = ia32_skip_downconv(op1);
1267 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1268 * 1. Add with immediate -> Lea
1269 * 2. Add with possible source address mode -> Add
1270 * 3. Otherwise -> Lea
1272 memset(&addr, 0, sizeof(addr));
1273 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1274 add_immediate_op = NULL;
1276 dbgi = get_irn_dbg_info(node);
1277 block = get_nodes_block(node);
1278 new_block = be_transform_node(block);
1281 if (addr.base == NULL && addr.index == NULL) {
1282 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1283 addr.symconst_sign, 0, addr.offset);
1284 SET_IA32_ORIG_NODE(new_node, node);
1287 /* add with immediate? */
1288 if (addr.index == NULL) {
1289 add_immediate_op = addr.base;
1290 } else if (addr.base == NULL && addr.scale == 0) {
1291 add_immediate_op = addr.index;
1294 if (add_immediate_op != NULL) {
1295 if (!am_has_immediates(&addr)) {
1296 #ifdef DEBUG_libfirm
1297 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1300 return be_transform_node(add_immediate_op);
1303 new_node = create_lea_from_address(dbgi, new_block, &addr);
1304 SET_IA32_ORIG_NODE(new_node, node);
1308 /* test if we can use source address mode */
1309 match_arguments(&am, block, op1, op2, NULL, match_commutative
1310 | match_mode_neutral | match_am | match_immediate | match_try_am);
1312 /* construct an Add with source address mode */
1313 if (am.op_type == ia32_AddrModeS) {
1314 ia32_address_t *am_addr = &am.addr;
1315 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1316 am_addr->index, am_addr->mem, am.new_op1,
1318 set_am_attributes(new_node, &am);
1319 SET_IA32_ORIG_NODE(new_node, node);
1321 new_node = fix_mem_proj(new_node, &am);
1326 /* otherwise construct a lea */
1327 new_node = create_lea_from_address(dbgi, new_block, &addr);
1328 SET_IA32_ORIG_NODE(new_node, node);
1333 * Creates an ia32 Mul.
1335 * @return the created ia32 Mul node
1337 static ir_node *gen_Mul(ir_node *node)
1339 ir_node *op1 = get_Mul_left(node);
1340 ir_node *op2 = get_Mul_right(node);
1341 ir_mode *mode = get_irn_mode(node);
1343 if (mode_is_float(mode)) {
1344 if (ia32_cg_config.use_sse2)
1345 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1346 match_commutative | match_am);
1348 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1350 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1351 match_commutative | match_am | match_mode_neutral |
1352 match_immediate | match_am_and_immediates);
1356 * Creates an ia32 Mulh.
1357 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1358 * this result while Mul returns the lower 32 bit.
1360 * @return the created ia32 Mulh node
1362 static ir_node *gen_Mulh(ir_node *node)
1364 dbg_info *dbgi = get_irn_dbg_info(node);
1365 ir_node *op1 = get_Mulh_left(node);
1366 ir_node *op2 = get_Mulh_right(node);
1367 ir_mode *mode = get_irn_mode(node);
1369 ir_node *proj_res_high;
1371 if (get_mode_size_bits(mode) != 32) {
1372 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1375 if (mode_is_signed(mode)) {
1376 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1377 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1379 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1380 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1382 return proj_res_high;
1386 * Creates an ia32 And.
1388 * @return The created ia32 And node
1390 static ir_node *gen_And(ir_node *node)
1392 ir_node *op1 = get_And_left(node);
1393 ir_node *op2 = get_And_right(node);
1394 assert(! mode_is_float(get_irn_mode(node)));
1396 /* is it a zero extension? */
1397 if (is_Const(op2)) {
1398 ir_tarval *tv = get_Const_tarval(op2);
1399 long v = get_tarval_long(tv);
1401 if (v == 0xFF || v == 0xFFFF) {
1402 dbg_info *dbgi = get_irn_dbg_info(node);
1403 ir_node *block = get_nodes_block(node);
1410 assert(v == 0xFFFF);
1413 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1418 return gen_binop(node, op1, op2, new_bd_ia32_And,
1419 match_commutative | match_mode_neutral | match_am | match_immediate);
1423 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1426 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1428 if (is_Const(value1) && is_Const(value2)) {
1429 ir_tarval *tv1 = get_Const_tarval(value1);
1430 ir_tarval *tv2 = get_Const_tarval(value2);
1431 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1432 long v1 = get_tarval_long(tv1);
1433 long v2 = get_tarval_long(tv2);
1434 return v1 <= v2 && v2 == 32-v1;
1440 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1441 ir_node *high, ir_node *low,
1445 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1446 * op1 - target to be shifted
1447 * op2 - contains bits to be shifted into target
1449 * Only op3 can be an immediate.
1451 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1452 ir_node *high, ir_node *low, ir_node *count,
1453 new_shiftd_func func)
1455 ir_node *new_block = be_transform_node(block);
1456 ir_node *new_high = be_transform_node(high);
1457 ir_node *new_low = be_transform_node(low);
1461 /* the shift amount can be any mode that is bigger than 5 bits, since all
1462 * other bits are ignored anyway */
1463 while (is_Conv(count) &&
1464 get_irn_n_edges(count) == 1 &&
1465 mode_is_int(get_irn_mode(count))) {
1466 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1467 count = get_Conv_op(count);
1469 new_count = create_immediate_or_transform(count, 0);
1471 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1475 static ir_node *match_64bit_shift(ir_node *node)
1477 ir_node *op1 = get_Or_left(node);
1478 ir_node *op2 = get_Or_right(node);
1486 /* match ShlD operation */
1487 if (is_Shl(op1) && is_Shr(op2)) {
1488 ir_node *shl_right = get_Shl_right(op1);
1489 ir_node *shl_left = get_Shl_left(op1);
1490 ir_node *shr_right = get_Shr_right(op2);
1491 ir_node *shr_left = get_Shr_left(op2);
1492 /* constant ShlD operation */
1493 if (is_complementary_shifts(shl_right, shr_right)) {
1494 dbg_info *dbgi = get_irn_dbg_info(node);
1495 ir_node *block = get_nodes_block(node);
1496 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1499 /* constant ShrD operation */
1500 if (is_complementary_shifts(shr_right, shl_right)) {
1501 dbg_info *dbgi = get_irn_dbg_info(node);
1502 ir_node *block = get_nodes_block(node);
1503 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1506 /* lower_dw produces the following for ShlD:
1507 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1508 if (is_Shr(shr_left) && is_Not(shr_right)
1509 && is_Const_1(get_Shr_right(shr_left))
1510 && get_Not_op(shr_right) == shl_right) {
1511 dbg_info *dbgi = get_irn_dbg_info(node);
1512 ir_node *block = get_nodes_block(node);
1513 ir_node *val_h = get_Shr_left(shr_left);
1514 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1517 /* lower_dw produces the following for ShrD:
1518 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1519 if (is_Shl(shl_left) && is_Not(shl_right)
1520 && is_Const_1(get_Shl_right(shl_left))
1521 && get_Not_op(shl_right) == shr_right) {
1522 dbg_info *dbgi = get_irn_dbg_info(node);
1523 ir_node *block = get_nodes_block(node);
1524 ir_node *val_h = get_Shl_left(shl_left);
1525 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1534 * Creates an ia32 Or.
1536 * @return The created ia32 Or node
1538 static ir_node *gen_Or(ir_node *node)
1540 ir_node *op1 = get_Or_left(node);
1541 ir_node *op2 = get_Or_right(node);
1544 res = match_64bit_shift(node);
1548 assert (! mode_is_float(get_irn_mode(node)));
1549 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1550 | match_mode_neutral | match_am | match_immediate);
1556 * Creates an ia32 Eor.
1558 * @return The created ia32 Eor node
1560 static ir_node *gen_Eor(ir_node *node)
1562 ir_node *op1 = get_Eor_left(node);
1563 ir_node *op2 = get_Eor_right(node);
1565 assert(! mode_is_float(get_irn_mode(node)));
1566 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1567 | match_mode_neutral | match_am | match_immediate);
1572 * Creates an ia32 Sub.
1574 * @return The created ia32 Sub node
1576 static ir_node *gen_Sub(ir_node *node)
1578 ir_node *op1 = get_Sub_left(node);
1579 ir_node *op2 = get_Sub_right(node);
1580 ir_mode *mode = get_irn_mode(node);
1582 if (mode_is_float(mode)) {
1583 if (ia32_cg_config.use_sse2)
1584 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1586 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1589 if (is_Const(op2)) {
1590 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1594 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1595 | match_am | match_immediate);
1598 static ir_node *transform_AM_mem(ir_node *const block,
1599 ir_node *const src_val,
1600 ir_node *const src_mem,
1601 ir_node *const am_mem)
1603 if (is_NoMem(am_mem)) {
1604 return be_transform_node(src_mem);
1605 } else if (is_Proj(src_val) &&
1607 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1608 /* avoid memory loop */
1610 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1611 ir_node *const ptr_pred = get_Proj_pred(src_val);
1612 int const arity = get_Sync_n_preds(src_mem);
1617 NEW_ARR_A(ir_node*, ins, arity + 1);
1619 /* NOTE: This sometimes produces dead-code because the old sync in
1620 * src_mem might not be used anymore, we should detect this case
1621 * and kill the sync... */
1622 for (i = arity - 1; i >= 0; --i) {
1623 ir_node *const pred = get_Sync_pred(src_mem, i);
1625 /* avoid memory loop */
1626 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1629 ins[n++] = be_transform_node(pred);
1632 if (n==1 && ins[0] == am_mem) {
1634 /* creating a new Sync and relying on CSE may fail,
1635 * if am_mem is a ProjM, which does not yet verify. */
1639 return new_r_Sync(block, n, ins);
1643 ins[0] = be_transform_node(src_mem);
1645 return new_r_Sync(block, 2, ins);
1650 * Create a 32bit to 64bit signed extension.
1652 * @param dbgi debug info
1653 * @param block the block where node nodes should be placed
1654 * @param val the value to extend
1655 * @param orig the original node
1657 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1658 ir_node *val, const ir_node *orig)
1663 if (ia32_cg_config.use_short_sex_eax) {
1664 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1665 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1667 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1668 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1670 SET_IA32_ORIG_NODE(res, orig);
1675 * Generates an ia32 Div with additional infrastructure for the
1676 * register allocator if needed.
1678 static ir_node *create_Div(ir_node *node)
1680 dbg_info *dbgi = get_irn_dbg_info(node);
1681 ir_node *block = get_nodes_block(node);
1682 ir_node *new_block = be_transform_node(block);
1683 int throws_exception = ir_throws_exception(node);
1690 ir_node *sign_extension;
1691 ia32_address_mode_t am;
1692 ia32_address_t *addr = &am.addr;
1694 /* the upper bits have random contents for smaller modes */
1695 switch (get_irn_opcode(node)) {
1697 op1 = get_Div_left(node);
1698 op2 = get_Div_right(node);
1699 mem = get_Div_mem(node);
1700 mode = get_Div_resmode(node);
1703 op1 = get_Mod_left(node);
1704 op2 = get_Mod_right(node);
1705 mem = get_Mod_mem(node);
1706 mode = get_Mod_resmode(node);
1709 panic("invalid divmod node %+F", node);
1712 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1714 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1715 is the memory of the consumed address. We can have only the second op as address
1716 in Div nodes, so check only op2. */
1717 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1719 if (mode_is_signed(mode)) {
1720 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1721 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1722 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1724 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1726 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1727 addr->index, new_mem, am.new_op2,
1728 am.new_op1, sign_extension);
1730 ir_set_throws_exception(new_node, throws_exception);
1732 set_irn_pinned(new_node, get_irn_pinned(node));
1734 set_am_attributes(new_node, &am);
1735 SET_IA32_ORIG_NODE(new_node, node);
1737 new_node = fix_mem_proj(new_node, &am);
1743 * Generates an ia32 Mod.
1745 static ir_node *gen_Mod(ir_node *node)
1747 return create_Div(node);
1751 * Generates an ia32 Div.
1753 static ir_node *gen_Div(ir_node *node)
1755 ir_mode *mode = get_Div_resmode(node);
1756 if (mode_is_float(mode)) {
1757 ir_node *op1 = get_Div_left(node);
1758 ir_node *op2 = get_Div_right(node);
1760 if (ia32_cg_config.use_sse2) {
1761 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1763 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1767 return create_Div(node);
1771 * Creates an ia32 Shl.
1773 * @return The created ia32 Shl node
1775 static ir_node *gen_Shl(ir_node *node)
1777 ir_node *left = get_Shl_left(node);
1778 ir_node *right = get_Shl_right(node);
1780 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1781 match_mode_neutral | match_immediate);
1785 * Creates an ia32 Shr.
1787 * @return The created ia32 Shr node
1789 static ir_node *gen_Shr(ir_node *node)
1791 ir_node *left = get_Shr_left(node);
1792 ir_node *right = get_Shr_right(node);
1794 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1800 * Creates an ia32 Sar.
1802 * @return The created ia32 Shrs node
1804 static ir_node *gen_Shrs(ir_node *node)
1806 ir_node *left = get_Shrs_left(node);
1807 ir_node *right = get_Shrs_right(node);
1809 if (is_Const(right)) {
1810 ir_tarval *tv = get_Const_tarval(right);
1811 long val = get_tarval_long(tv);
1813 /* this is a sign extension */
1814 dbg_info *dbgi = get_irn_dbg_info(node);
1815 ir_node *block = be_transform_node(get_nodes_block(node));
1816 ir_node *new_op = be_transform_node(left);
1818 return create_sex_32_64(dbgi, block, new_op, node);
1822 /* 8 or 16 bit sign extension? */
1823 if (is_Const(right) && is_Shl(left)) {
1824 ir_node *shl_left = get_Shl_left(left);
1825 ir_node *shl_right = get_Shl_right(left);
1826 if (is_Const(shl_right)) {
1827 ir_tarval *tv1 = get_Const_tarval(right);
1828 ir_tarval *tv2 = get_Const_tarval(shl_right);
1829 if (tv1 == tv2 && tarval_is_long(tv1)) {
1830 long val = get_tarval_long(tv1);
1831 if (val == 16 || val == 24) {
1832 dbg_info *dbgi = get_irn_dbg_info(node);
1833 ir_node *block = get_nodes_block(node);
1843 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1852 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1858 * Creates an ia32 Rol.
1860 * @param op1 The first operator
1861 * @param op2 The second operator
1862 * @return The created ia32 RotL node
1864 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1866 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1872 * Creates an ia32 Ror.
1873 * NOTE: There is no RotR with immediate because this would always be a RotL
1874 * "imm-mode_size_bits" which can be pre-calculated.
1876 * @param op1 The first operator
1877 * @param op2 The second operator
1878 * @return The created ia32 RotR node
1880 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1882 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1888 * Creates an ia32 RotR or RotL (depending on the found pattern).
1890 * @return The created ia32 RotL or RotR node
1892 static ir_node *gen_Rotl(ir_node *node)
1894 ir_node *op1 = get_Rotl_left(node);
1895 ir_node *op2 = get_Rotl_right(node);
1897 if (is_Minus(op2)) {
1898 return gen_Ror(node, op1, get_Minus_op(op2));
1901 return gen_Rol(node, op1, op2);
1907 * Transforms a Minus node.
1909 * @return The created ia32 Minus node
1911 static ir_node *gen_Minus(ir_node *node)
1913 ir_node *op = get_Minus_op(node);
1914 ir_node *block = be_transform_node(get_nodes_block(node));
1915 dbg_info *dbgi = get_irn_dbg_info(node);
1916 ir_mode *mode = get_irn_mode(node);
1921 if (mode_is_float(mode)) {
1922 ir_node *new_op = be_transform_node(op);
1923 if (ia32_cg_config.use_sse2) {
1924 /* TODO: non-optimal... if we have many xXors, then we should
1925 * rather create a load for the const and use that instead of
1926 * several AM nodes... */
1927 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1929 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1930 noreg_GP, nomem, new_op, noreg_xmm);
1932 size = get_mode_size_bits(mode);
1933 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1935 set_ia32_am_sc(new_node, ent);
1936 set_ia32_op_type(new_node, ia32_AddrModeS);
1937 set_ia32_ls_mode(new_node, mode);
1939 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1942 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1945 SET_IA32_ORIG_NODE(new_node, node);
1951 * Transforms a Not node.
1953 * @return The created ia32 Not node
1955 static ir_node *gen_Not(ir_node *node)
1957 ir_node *op = get_Not_op(node);
1959 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1960 assert (! mode_is_float(get_irn_mode(node)));
1962 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1965 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1966 bool negate, ir_node *node)
1968 ir_node *new_block = be_transform_node(block);
1969 ir_mode *mode = get_irn_mode(op);
1975 if (mode_is_float(mode)) {
1976 new_op = be_transform_node(op);
1978 if (ia32_cg_config.use_sse2) {
1979 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1980 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1981 noreg_GP, nomem, new_op, noreg_fp);
1983 size = get_mode_size_bits(mode);
1984 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1986 set_ia32_am_sc(new_node, ent);
1988 SET_IA32_ORIG_NODE(new_node, node);
1990 set_ia32_op_type(new_node, ia32_AddrModeS);
1991 set_ia32_ls_mode(new_node, mode);
1993 /* TODO, implement -Abs case */
1996 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1997 SET_IA32_ORIG_NODE(new_node, node);
1999 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2000 SET_IA32_ORIG_NODE(new_node, node);
2009 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2011 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2013 dbg_info *dbgi = get_irn_dbg_info(cmp);
2014 ir_node *block = get_nodes_block(cmp);
2015 ir_node *new_block = be_transform_node(block);
2016 ir_node *op1 = be_transform_node(x);
2017 ir_node *op2 = be_transform_node(n);
2019 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2022 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2025 if (mode_is_float(mode)) {
2027 case ir_relation_equal: return ia32_cc_float_equal;
2028 case ir_relation_less: return ia32_cc_float_below;
2029 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2030 case ir_relation_greater: return ia32_cc_float_above;
2031 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2032 case ir_relation_less_greater: return ia32_cc_not_equal;
2033 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2034 case ir_relation_unordered: return ia32_cc_parity;
2035 case ir_relation_unordered_equal: return ia32_cc_equal;
2036 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2037 case ir_relation_unordered_less_equal:
2038 return ia32_cc_float_unordered_below_equal;
2039 case ir_relation_unordered_greater:
2040 return ia32_cc_float_unordered_above;
2041 case ir_relation_unordered_greater_equal:
2042 return ia32_cc_float_unordered_above_equal;
2043 case ir_relation_unordered_less_greater:
2044 return ia32_cc_float_not_equal;
2045 case ir_relation_false:
2046 case ir_relation_true:
2047 /* should we introduce a jump always/jump never? */
2050 panic("Unexpected float pnc");
2051 } else if (mode_is_signed(mode)) {
2053 case ir_relation_unordered_equal:
2054 case ir_relation_equal: return ia32_cc_equal;
2055 case ir_relation_unordered_less:
2056 case ir_relation_less: return ia32_cc_less;
2057 case ir_relation_unordered_less_equal:
2058 case ir_relation_less_equal: return ia32_cc_less_equal;
2059 case ir_relation_unordered_greater:
2060 case ir_relation_greater: return ia32_cc_greater;
2061 case ir_relation_unordered_greater_equal:
2062 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2063 case ir_relation_unordered_less_greater:
2064 case ir_relation_less_greater: return ia32_cc_not_equal;
2065 case ir_relation_less_equal_greater:
2066 case ir_relation_unordered:
2067 case ir_relation_false:
2068 case ir_relation_true:
2069 /* introduce jump always/jump never? */
2072 panic("Unexpected pnc");
2075 case ir_relation_unordered_equal:
2076 case ir_relation_equal: return ia32_cc_equal;
2077 case ir_relation_unordered_less:
2078 case ir_relation_less: return ia32_cc_below;
2079 case ir_relation_unordered_less_equal:
2080 case ir_relation_less_equal: return ia32_cc_below_equal;
2081 case ir_relation_unordered_greater:
2082 case ir_relation_greater: return ia32_cc_above;
2083 case ir_relation_unordered_greater_equal:
2084 case ir_relation_greater_equal: return ia32_cc_above_equal;
2085 case ir_relation_unordered_less_greater:
2086 case ir_relation_less_greater: return ia32_cc_not_equal;
2087 case ir_relation_less_equal_greater:
2088 case ir_relation_unordered:
2089 case ir_relation_false:
2090 case ir_relation_true:
2091 /* introduce jump always/jump never? */
2094 panic("Unexpected pnc");
2098 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
2100 /* a mode_b value, we have to compare it against 0 */
2101 dbg_info *dbgi = get_irn_dbg_info(node);
2102 ir_node *new_block = be_transform_node(get_nodes_block(node));
2103 ir_node *new_op = be_transform_node(node);
2104 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
2105 *cc_out = ia32_cc_not_equal;
2109 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2111 /* must have a Cmp as input */
2112 ir_relation relation = get_Cmp_relation(cmp);
2113 ir_relation possible;
2114 ir_node *l = get_Cmp_left(cmp);
2115 ir_node *r = get_Cmp_right(cmp);
2116 ir_mode *mode = get_irn_mode(l);
2119 /* check for bit-test */
2120 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2121 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2122 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2124 ir_node *la = get_And_left(l);
2125 ir_node *ra = get_And_right(l);
2132 ir_node *c = get_Shl_left(la);
2133 if (is_Const_1(c) && is_Const_0(r)) {
2134 /* (1 << n) & ra) */
2135 ir_node *n = get_Shl_right(la);
2136 flags = gen_bt(cmp, ra, n);
2137 /* the bit is copied into the CF flag */
2138 if (relation & ir_relation_equal)
2139 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2141 *cc_out = ia32_cc_below; /* test for CF=1 */
2147 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2148 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2149 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2150 * a predecessor node). So add the < bit */
2151 possible = ir_get_possible_cmp_relations(l, r);
2152 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2153 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2154 relation |= ir_relation_less_greater;
2156 /* just do a normal transformation of the Cmp */
2157 *cc_out = relation_to_condition_code(relation, mode);
2158 flags = be_transform_node(cmp);
2163 * Transform a node returning a "flag" result.
2165 * @param node the node to transform
2166 * @param cc_out the compare mode to use
2168 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2171 return get_flags_node_cmp(node, cc_out);
2172 assert(get_irn_mode(node) == mode_b);
2173 return get_flags_mode_b(node, cc_out);
2177 * Transforms a Load.
2179 * @return the created ia32 Load node
2181 static ir_node *gen_Load(ir_node *node)
2183 ir_node *old_block = get_nodes_block(node);
2184 ir_node *block = be_transform_node(old_block);
2185 ir_node *ptr = get_Load_ptr(node);
2186 ir_node *mem = get_Load_mem(node);
2187 ir_node *new_mem = be_transform_node(mem);
2188 dbg_info *dbgi = get_irn_dbg_info(node);
2189 ir_mode *mode = get_Load_mode(node);
2190 int throws_exception = ir_throws_exception(node);
2194 ia32_address_t addr;
2196 /* construct load address */
2197 memset(&addr, 0, sizeof(addr));
2198 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2205 base = be_transform_node(base);
2208 if (index == NULL) {
2211 index = be_transform_node(index);
2214 if (mode_is_float(mode)) {
2215 if (ia32_cg_config.use_sse2) {
2216 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2219 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2223 assert(mode != mode_b);
2225 /* create a conv node with address mode for smaller modes */
2226 if (get_mode_size_bits(mode) < 32) {
2227 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2228 new_mem, noreg_GP, mode);
2230 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2233 ir_set_throws_exception(new_node, throws_exception);
2235 set_irn_pinned(new_node, get_irn_pinned(node));
2236 set_ia32_op_type(new_node, ia32_AddrModeS);
2237 set_ia32_ls_mode(new_node, mode);
2238 set_address(new_node, &addr);
2240 if (get_irn_pinned(node) == op_pin_state_floats) {
2241 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2242 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2243 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2244 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2247 SET_IA32_ORIG_NODE(new_node, node);
2252 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2253 ir_node *ptr, ir_node *other)
2260 /* we only use address mode if we're the only user of the load */
2261 if (get_irn_n_edges(node) > 1)
2264 load = get_Proj_pred(node);
2267 if (get_nodes_block(load) != block)
2270 /* store should have the same pointer as the load */
2271 if (get_Load_ptr(load) != ptr)
2274 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2275 if (other != NULL &&
2276 get_nodes_block(other) == block &&
2277 heights_reachable_in_block(ia32_heights, other, load)) {
2281 if (ia32_prevents_AM(block, load, mem))
2283 /* Store should be attached to the load via mem */
2284 assert(heights_reachable_in_block(ia32_heights, mem, load));
2289 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2290 ir_node *mem, ir_node *ptr, ir_mode *mode,
2291 construct_binop_dest_func *func,
2292 construct_binop_dest_func *func8bit,
2293 match_flags_t flags)
2295 ir_node *src_block = get_nodes_block(node);
2303 ia32_address_mode_t am;
2304 ia32_address_t *addr = &am.addr;
2305 memset(&am, 0, sizeof(am));
2307 assert(flags & match_immediate); /* there is no destam node without... */
2308 commutative = (flags & match_commutative) != 0;
2310 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2311 build_address(&am, op1, ia32_create_am_double_use);
2312 new_op = create_immediate_or_transform(op2, 0);
2313 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2314 build_address(&am, op2, ia32_create_am_double_use);
2315 new_op = create_immediate_or_transform(op1, 0);
2320 if (addr->base == NULL)
2321 addr->base = noreg_GP;
2322 if (addr->index == NULL)
2323 addr->index = noreg_GP;
2324 if (addr->mem == NULL)
2327 dbgi = get_irn_dbg_info(node);
2328 block = be_transform_node(src_block);
2329 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2331 if (get_mode_size_bits(mode) == 8) {
2332 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2334 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2336 set_address(new_node, addr);
2337 set_ia32_op_type(new_node, ia32_AddrModeD);
2338 set_ia32_ls_mode(new_node, mode);
2339 SET_IA32_ORIG_NODE(new_node, node);
2341 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2342 mem_proj = be_transform_node(am.mem_proj);
2343 be_set_transformed_node(am.mem_proj, new_node);
2344 be_set_transformed_node(mem_proj, new_node);
2349 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2350 ir_node *ptr, ir_mode *mode,
2351 construct_unop_dest_func *func)
2353 ir_node *src_block = get_nodes_block(node);
2359 ia32_address_mode_t am;
2360 ia32_address_t *addr = &am.addr;
2362 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2365 memset(&am, 0, sizeof(am));
2366 build_address(&am, op, ia32_create_am_double_use);
2368 dbgi = get_irn_dbg_info(node);
2369 block = be_transform_node(src_block);
2370 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2371 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2372 set_address(new_node, addr);
2373 set_ia32_op_type(new_node, ia32_AddrModeD);
2374 set_ia32_ls_mode(new_node, mode);
2375 SET_IA32_ORIG_NODE(new_node, node);
2377 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2378 mem_proj = be_transform_node(am.mem_proj);
2379 be_set_transformed_node(am.mem_proj, new_node);
2380 be_set_transformed_node(mem_proj, new_node);
2385 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2387 ir_mode *mode = get_irn_mode(node);
2388 ir_node *mux_true = get_Mux_true(node);
2389 ir_node *mux_false = get_Mux_false(node);
2397 ia32_condition_code_t cc;
2398 ia32_address_t addr;
2400 if (get_mode_size_bits(mode) != 8)
2403 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2405 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2411 cond = get_Mux_sel(node);
2412 flags = get_flags_node(cond, &cc);
2413 /* we can't handle the float special cases with SetM */
2414 if (cc & ia32_cc_additional_float_cases)
2417 cc = ia32_negate_condition_code(cc);
2419 build_address_ptr(&addr, ptr, mem);
2421 dbgi = get_irn_dbg_info(node);
2422 block = get_nodes_block(node);
2423 new_block = be_transform_node(block);
2424 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2425 addr.index, addr.mem, flags, cc);
2426 set_address(new_node, &addr);
2427 set_ia32_op_type(new_node, ia32_AddrModeD);
2428 set_ia32_ls_mode(new_node, mode);
2429 SET_IA32_ORIG_NODE(new_node, node);
2434 static ir_node *try_create_dest_am(ir_node *node)
2436 ir_node *val = get_Store_value(node);
2437 ir_node *mem = get_Store_mem(node);
2438 ir_node *ptr = get_Store_ptr(node);
2439 ir_mode *mode = get_irn_mode(val);
2440 unsigned bits = get_mode_size_bits(mode);
2445 /* handle only GP modes for now... */
2446 if (!ia32_mode_needs_gp_reg(mode))
2450 /* store must be the only user of the val node */
2451 if (get_irn_n_edges(val) > 1)
2453 /* skip pointless convs */
2455 ir_node *conv_op = get_Conv_op(val);
2456 ir_mode *pred_mode = get_irn_mode(conv_op);
2457 if (!ia32_mode_needs_gp_reg(pred_mode))
2459 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2467 /* value must be in the same block */
2468 if (get_nodes_block(node) != get_nodes_block(val))
2471 switch (get_irn_opcode(val)) {
2473 op1 = get_Add_left(val);
2474 op2 = get_Add_right(val);
2475 if (ia32_cg_config.use_incdec) {
2476 if (is_Const_1(op2)) {
2477 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2479 } else if (is_Const_Minus_1(op2)) {
2480 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2484 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2485 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2486 match_commutative | match_immediate);
2489 op1 = get_Sub_left(val);
2490 op2 = get_Sub_right(val);
2491 if (is_Const(op2)) {
2492 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2494 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2495 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2499 op1 = get_And_left(val);
2500 op2 = get_And_right(val);
2501 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2502 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2503 match_commutative | match_immediate);
2506 op1 = get_Or_left(val);
2507 op2 = get_Or_right(val);
2508 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2509 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2510 match_commutative | match_immediate);
2513 op1 = get_Eor_left(val);
2514 op2 = get_Eor_right(val);
2515 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2516 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2517 match_commutative | match_immediate);
2520 op1 = get_Shl_left(val);
2521 op2 = get_Shl_right(val);
2522 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2523 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2527 op1 = get_Shr_left(val);
2528 op2 = get_Shr_right(val);
2529 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2530 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2534 op1 = get_Shrs_left(val);
2535 op2 = get_Shrs_right(val);
2536 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2537 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2541 op1 = get_Rotl_left(val);
2542 op2 = get_Rotl_right(val);
2543 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2544 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2547 /* TODO: match ROR patterns... */
2549 new_node = try_create_SetMem(val, ptr, mem);
2553 op1 = get_Minus_op(val);
2554 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2557 /* should be lowered already */
2558 assert(mode != mode_b);
2559 op1 = get_Not_op(val);
2560 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2566 if (new_node != NULL) {
2567 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2568 get_irn_pinned(node) == op_pin_state_pinned) {
2569 set_irn_pinned(new_node, op_pin_state_pinned);
2576 static bool possible_int_mode_for_fp(ir_mode *mode)
2580 if (!mode_is_signed(mode))
2582 size = get_mode_size_bits(mode);
2583 if (size != 16 && size != 32)
2588 static int is_float_to_int_conv(const ir_node *node)
2590 ir_mode *mode = get_irn_mode(node);
2594 if (!possible_int_mode_for_fp(mode))
2599 conv_op = get_Conv_op(node);
2600 conv_mode = get_irn_mode(conv_op);
2602 if (!mode_is_float(conv_mode))
2609 * Transform a Store(floatConst) into a sequence of
2612 * @return the created ia32 Store node
2614 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2616 ir_mode *mode = get_irn_mode(cns);
2617 unsigned size = get_mode_size_bytes(mode);
2618 ir_tarval *tv = get_Const_tarval(cns);
2619 ir_node *block = get_nodes_block(node);
2620 ir_node *new_block = be_transform_node(block);
2621 ir_node *ptr = get_Store_ptr(node);
2622 ir_node *mem = get_Store_mem(node);
2623 dbg_info *dbgi = get_irn_dbg_info(node);
2626 int throws_exception = ir_throws_exception(node);
2628 ia32_address_t addr;
2630 assert(size % 4 == 0);
2633 build_address_ptr(&addr, ptr, mem);
2637 get_tarval_sub_bits(tv, ofs) |
2638 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2639 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2640 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2641 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2643 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2644 addr.index, addr.mem, imm);
2645 ir_node *mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2647 ir_set_throws_exception(new_node, throws_exception);
2648 set_irn_pinned(new_node, get_irn_pinned(node));
2649 set_ia32_op_type(new_node, ia32_AddrModeD);
2650 set_ia32_ls_mode(new_node, mode_Iu);
2651 set_address(new_node, &addr);
2652 SET_IA32_ORIG_NODE(new_node, node);
2660 } while (size != 0);
2663 return new_rd_Sync(dbgi, new_block, i, ins);
2665 return get_Proj_pred(ins[0]);
2670 * Generate a vfist or vfisttp instruction.
2672 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2673 ir_node *index, ir_node *mem, ir_node *val)
2675 if (ia32_cg_config.use_fisttp) {
2676 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2677 if other users exists */
2678 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2679 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2680 be_new_Keep(block, 1, &value);
2684 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2687 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2693 * Transforms a general (no special case) Store.
2695 * @return the created ia32 Store node
2697 static ir_node *gen_general_Store(ir_node *node)
2699 ir_node *val = get_Store_value(node);
2700 ir_mode *mode = get_irn_mode(val);
2701 ir_node *block = get_nodes_block(node);
2702 ir_node *new_block = be_transform_node(block);
2703 ir_node *ptr = get_Store_ptr(node);
2704 ir_node *mem = get_Store_mem(node);
2705 dbg_info *dbgi = get_irn_dbg_info(node);
2706 int throws_exception = ir_throws_exception(node);
2709 ia32_address_t addr;
2711 /* check for destination address mode */
2712 new_node = try_create_dest_am(node);
2713 if (new_node != NULL)
2716 /* construct store address */
2717 memset(&addr, 0, sizeof(addr));
2718 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2720 if (addr.base == NULL) {
2721 addr.base = noreg_GP;
2723 addr.base = be_transform_node(addr.base);
2726 if (addr.index == NULL) {
2727 addr.index = noreg_GP;
2729 addr.index = be_transform_node(addr.index);
2731 addr.mem = be_transform_node(mem);
2733 if (mode_is_float(mode)) {
2734 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2736 while (is_Conv(val) && mode == get_irn_mode(val)) {
2737 ir_node *op = get_Conv_op(val);
2738 if (!mode_is_float(get_irn_mode(op)))
2742 new_val = be_transform_node(val);
2743 if (ia32_cg_config.use_sse2) {
2744 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2745 addr.index, addr.mem, new_val);
2747 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2748 addr.index, addr.mem, new_val, mode);
2750 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2751 val = get_Conv_op(val);
2753 /* TODO: is this optimisation still necessary at all (middleend)? */
2754 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2756 while (is_Conv(val)) {
2757 ir_node *op = get_Conv_op(val);
2758 if (!mode_is_float(get_irn_mode(op)))
2760 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2764 new_val = be_transform_node(val);
2765 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2767 new_val = create_immediate_or_transform(val, 0);
2768 assert(mode != mode_b);
2770 if (get_mode_size_bits(mode) == 8) {
2771 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2772 addr.index, addr.mem, new_val);
2774 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2775 addr.index, addr.mem, new_val);
2778 ir_set_throws_exception(new_node, throws_exception);
2780 set_irn_pinned(new_node, get_irn_pinned(node));
2781 set_ia32_op_type(new_node, ia32_AddrModeD);
2782 set_ia32_ls_mode(new_node, mode);
2784 set_address(new_node, &addr);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 * Transforms a Store.
2793 * @return the created ia32 Store node
2795 static ir_node *gen_Store(ir_node *node)
2797 ir_node *val = get_Store_value(node);
2798 ir_mode *mode = get_irn_mode(val);
2800 if (mode_is_float(mode) && is_Const(val)) {
2801 /* We can transform every floating const store
2802 into a sequence of integer stores.
2803 If the constant is already in a register,
2804 it would be better to use it, but we don't
2805 have this information here. */
2806 return gen_float_const_Store(node, val);
2808 return gen_general_Store(node);
2812 * Transforms a Switch.
2814 * @return the created ia32 SwitchJmp node
2816 static ir_node *create_Switch(ir_node *node)
2818 dbg_info *dbgi = get_irn_dbg_info(node);
2819 ir_node *block = be_transform_node(get_nodes_block(node));
2820 ir_node *sel = get_Cond_selector(node);
2821 ir_node *new_sel = be_transform_node(sel);
2822 long switch_min = LONG_MAX;
2823 long switch_max = LONG_MIN;
2824 long default_pn = get_Cond_default_proj(node);
2826 const ir_edge_t *edge;
2828 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2830 /* determine the smallest switch case value */
2831 foreach_out_edge(node, edge) {
2832 ir_node *proj = get_edge_src_irn(edge);
2833 long pn = get_Proj_proj(proj);
2834 if (pn == default_pn)
2837 if (pn < switch_min)
2839 if (pn > switch_max)
2843 if ((unsigned long) (switch_max - switch_min) > 128000) {
2844 panic("Size of switch %+F bigger than 128000", node);
2847 if (switch_min != 0) {
2848 /* if smallest switch case is not 0 we need an additional sub */
2849 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2850 add_ia32_am_offs_int(new_sel, -switch_min);
2851 set_ia32_op_type(new_sel, ia32_AddrModeS);
2853 SET_IA32_ORIG_NODE(new_sel, node);
2856 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2857 SET_IA32_ORIG_NODE(new_node, node);
2863 * Transform a Cond node.
2865 static ir_node *gen_Cond(ir_node *node)
2867 ir_node *block = get_nodes_block(node);
2868 ir_node *new_block = be_transform_node(block);
2869 dbg_info *dbgi = get_irn_dbg_info(node);
2870 ir_node *sel = get_Cond_selector(node);
2871 ir_mode *sel_mode = get_irn_mode(sel);
2872 ir_node *flags = NULL;
2874 ia32_condition_code_t cc;
2876 if (sel_mode != mode_b) {
2877 return create_Switch(node);
2880 /* we get flags from a Cmp */
2881 flags = get_flags_node(sel, &cc);
2883 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2884 SET_IA32_ORIG_NODE(new_node, node);
2890 * Transform a be_Copy.
2892 static ir_node *gen_be_Copy(ir_node *node)
2894 ir_node *new_node = be_duplicate_node(node);
2895 ir_mode *mode = get_irn_mode(new_node);
2897 if (ia32_mode_needs_gp_reg(mode)) {
2898 set_irn_mode(new_node, mode_Iu);
2904 static ir_node *create_Fucom(ir_node *node)
2906 dbg_info *dbgi = get_irn_dbg_info(node);
2907 ir_node *block = get_nodes_block(node);
2908 ir_node *new_block = be_transform_node(block);
2909 ir_node *left = get_Cmp_left(node);
2910 ir_node *new_left = be_transform_node(left);
2911 ir_node *right = get_Cmp_right(node);
2915 if (ia32_cg_config.use_fucomi) {
2916 new_right = be_transform_node(right);
2917 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2919 set_ia32_commutative(new_node);
2920 SET_IA32_ORIG_NODE(new_node, node);
2922 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2923 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2925 new_right = be_transform_node(right);
2926 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2929 set_ia32_commutative(new_node);
2931 SET_IA32_ORIG_NODE(new_node, node);
2933 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2934 SET_IA32_ORIG_NODE(new_node, node);
2940 static ir_node *create_Ucomi(ir_node *node)
2942 dbg_info *dbgi = get_irn_dbg_info(node);
2943 ir_node *src_block = get_nodes_block(node);
2944 ir_node *new_block = be_transform_node(src_block);
2945 ir_node *left = get_Cmp_left(node);
2946 ir_node *right = get_Cmp_right(node);
2948 ia32_address_mode_t am;
2949 ia32_address_t *addr = &am.addr;
2951 match_arguments(&am, src_block, left, right, NULL,
2952 match_commutative | match_am);
2954 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2955 addr->mem, am.new_op1, am.new_op2,
2957 set_am_attributes(new_node, &am);
2959 SET_IA32_ORIG_NODE(new_node, node);
2961 new_node = fix_mem_proj(new_node, &am);
2967 * returns true if it is assured, that the upper bits of a node are "clean"
2968 * which means for a 16 or 8 bit value, that the upper bits in the register
2969 * are 0 for unsigned and a copy of the last significant bit for signed
2972 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2974 assert(ia32_mode_needs_gp_reg(mode));
2975 if (get_mode_size_bits(mode) >= 32)
2978 if (is_Proj(transformed_node))
2979 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2981 switch (get_ia32_irn_opcode(transformed_node)) {
2982 case iro_ia32_Conv_I2I:
2983 case iro_ia32_Conv_I2I8Bit: {
2984 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2985 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2987 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2994 if (mode_is_signed(mode)) {
2995 return false; /* TODO handle signed modes */
2997 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2998 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2999 const ia32_immediate_attr_t *attr
3000 = get_ia32_immediate_attr_const(right);
3001 if (attr->symconst == 0 &&
3002 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
3006 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
3010 /* TODO too conservative if shift amount is constant */
3011 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
3014 if (!mode_is_signed(mode)) {
3016 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
3017 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
3019 /* TODO if one is known to be zero extended, then || is sufficient */
3024 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3025 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3027 case iro_ia32_Const:
3028 case iro_ia32_Immediate: {
3029 const ia32_immediate_attr_t *attr =
3030 get_ia32_immediate_attr_const(transformed_node);
3031 if (mode_is_signed(mode)) {
3032 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3033 return shifted == 0 || shifted == -1;
3035 unsigned long shifted = (unsigned long)attr->offset;
3036 shifted >>= get_mode_size_bits(mode);
3037 return shifted == 0;
3047 * Generate code for a Cmp.
3049 static ir_node *gen_Cmp(ir_node *node)
3051 dbg_info *dbgi = get_irn_dbg_info(node);
3052 ir_node *block = get_nodes_block(node);
3053 ir_node *new_block = be_transform_node(block);
3054 ir_node *left = get_Cmp_left(node);
3055 ir_node *right = get_Cmp_right(node);
3056 ir_mode *cmp_mode = get_irn_mode(left);
3058 ia32_address_mode_t am;
3059 ia32_address_t *addr = &am.addr;
3061 if (mode_is_float(cmp_mode)) {
3062 if (ia32_cg_config.use_sse2) {
3063 return create_Ucomi(node);
3065 return create_Fucom(node);
3069 assert(ia32_mode_needs_gp_reg(cmp_mode));
3071 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3072 if (is_Const_0(right) &&
3074 get_irn_n_edges(left) == 1) {
3075 /* Test(and_left, and_right) */
3076 ir_node *and_left = get_And_left(left);
3077 ir_node *and_right = get_And_right(left);
3079 /* matze: code here used mode instead of cmd_mode, I think it is always
3080 * the same as cmp_mode, but I leave this here to see if this is really
3083 assert(get_irn_mode(and_left) == cmp_mode);
3085 match_arguments(&am, block, and_left, and_right, NULL,
3087 match_am | match_8bit_am | match_16bit_am |
3088 match_am_and_immediates | match_immediate);
3090 /* use 32bit compare mode if possible since the opcode is smaller */
3091 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3092 upper_bits_clean(am.new_op2, cmp_mode)) {
3093 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3096 if (get_mode_size_bits(cmp_mode) == 8) {
3097 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3098 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3100 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3101 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3104 /* Cmp(left, right) */
3105 match_arguments(&am, block, left, right, NULL,
3106 match_commutative | match_am | match_8bit_am |
3107 match_16bit_am | match_am_and_immediates |
3109 /* use 32bit compare mode if possible since the opcode is smaller */
3110 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3111 upper_bits_clean(am.new_op2, cmp_mode)) {
3112 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3115 if (get_mode_size_bits(cmp_mode) == 8) {
3116 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3117 addr->index, addr->mem, am.new_op1,
3118 am.new_op2, am.ins_permuted);
3120 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3121 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3124 set_am_attributes(new_node, &am);
3125 set_ia32_ls_mode(new_node, cmp_mode);
3127 SET_IA32_ORIG_NODE(new_node, node);
3129 new_node = fix_mem_proj(new_node, &am);
3134 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3135 ia32_condition_code_t cc)
3137 dbg_info *dbgi = get_irn_dbg_info(node);
3138 ir_node *block = get_nodes_block(node);
3139 ir_node *new_block = be_transform_node(block);
3140 ir_node *val_true = get_Mux_true(node);
3141 ir_node *val_false = get_Mux_false(node);
3143 ia32_address_mode_t am;
3144 ia32_address_t *addr;
3146 assert(ia32_cg_config.use_cmov);
3147 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3151 match_arguments(&am, block, val_false, val_true, flags,
3152 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3154 if (am.ins_permuted)
3155 cc = ia32_negate_condition_code(cc);
3157 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3158 addr->mem, am.new_op1, am.new_op2, new_flags,
3160 set_am_attributes(new_node, &am);
3162 SET_IA32_ORIG_NODE(new_node, node);
3164 new_node = fix_mem_proj(new_node, &am);
3170 * Creates a ia32 Setcc instruction.
3172 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3173 ir_node *flags, ia32_condition_code_t cc,
3176 ir_mode *mode = get_irn_mode(orig_node);
3179 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3180 SET_IA32_ORIG_NODE(new_node, orig_node);
3182 /* we might need to conv the result up */
3183 if (get_mode_size_bits(mode) > 8) {
3184 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3185 nomem, new_node, mode_Bu);
3186 SET_IA32_ORIG_NODE(new_node, orig_node);
3193 * Create instruction for an unsigned Difference or Zero.
3195 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3197 ir_mode *mode = get_irn_mode(psi);
3207 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3208 match_mode_neutral | match_am | match_immediate | match_two_users);
3210 block = get_nodes_block(new_node);
3212 if (is_Proj(new_node)) {
3213 sub = get_Proj_pred(new_node);
3216 set_irn_mode(sub, mode_T);
3217 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3219 assert(is_ia32_Sub(sub));
3220 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3222 dbgi = get_irn_dbg_info(psi);
3223 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3224 notn = new_bd_ia32_Not(dbgi, block, sbb);
3226 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3227 set_ia32_commutative(new_node);
3232 * Create an const array of two float consts.
3234 * @param c0 the first constant
3235 * @param c1 the second constant
3236 * @param new_mode IN/OUT for the mode of the constants, if NULL
3237 * smallest possible mode will be used
3239 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3242 ir_mode *mode = *new_mode;
3244 ir_initializer_t *initializer;
3245 ir_tarval *tv0 = get_Const_tarval(c0);
3246 ir_tarval *tv1 = get_Const_tarval(c1);
3249 /* detect the best mode for the constants */
3250 mode = get_tarval_mode(tv0);
3252 if (mode != mode_F) {
3253 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3254 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3256 tv0 = tarval_convert_to(tv0, mode);
3257 tv1 = tarval_convert_to(tv1, mode);
3258 } else if (mode != mode_D) {
3259 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3260 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3262 tv0 = tarval_convert_to(tv0, mode);
3263 tv1 = tarval_convert_to(tv1, mode);
3270 tp = ia32_create_float_type(mode, 4);
3271 tp = ia32_create_float_array(tp);
3273 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3275 set_entity_ld_ident(ent, get_entity_ident(ent));
3276 set_entity_visibility(ent, ir_visibility_private);
3277 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3279 initializer = create_initializer_compound(2);
3281 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3282 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3284 set_entity_initializer(ent, initializer);
3291 * Possible transformations for creating a Setcc.
3293 enum setcc_transform_insn {
3306 typedef struct setcc_transform {
3308 ia32_condition_code_t cc;
3310 enum setcc_transform_insn transform;
3314 } setcc_transform_t;
3317 * Setcc can only handle 0 and 1 result.
3318 * Find a transformation that creates 0 and 1 from
3321 static void find_const_transform(ia32_condition_code_t cc,
3322 ir_tarval *t, ir_tarval *f,
3323 setcc_transform_t *res)
3329 if (tarval_is_null(t)) {
3333 cc = ia32_negate_condition_code(cc);
3334 } else if (tarval_cmp(t, f) == ir_relation_less) {
3335 // now, t is the bigger one
3339 cc = ia32_negate_condition_code(cc);
3343 if (! tarval_is_null(f)) {
3344 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3347 res->steps[step].transform = SETCC_TR_ADD;
3349 if (t == tarval_bad)
3350 panic("constant subtract failed");
3351 if (! tarval_is_long(f))
3352 panic("tarval is not long");
3354 res->steps[step].val = get_tarval_long(f);
3356 f = tarval_sub(f, f, NULL);
3357 assert(tarval_is_null(f));
3360 if (tarval_is_one(t)) {
3361 res->steps[step].transform = SETCC_TR_SET;
3362 res->num_steps = ++step;
3366 if (tarval_is_minus_one(t)) {
3367 res->steps[step].transform = SETCC_TR_NEG;
3369 res->steps[step].transform = SETCC_TR_SET;
3370 res->num_steps = ++step;
3373 if (tarval_is_long(t)) {
3374 long v = get_tarval_long(t);
3376 res->steps[step].val = 0;
3379 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3381 res->steps[step].transform = SETCC_TR_LEAxx;
3382 res->steps[step].scale = 3; /* (a << 3) + a */
3385 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3387 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3388 res->steps[step].scale = 3; /* (a << 3) */
3391 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3393 res->steps[step].transform = SETCC_TR_LEAxx;
3394 res->steps[step].scale = 2; /* (a << 2) + a */
3397 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3399 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3400 res->steps[step].scale = 2; /* (a << 2) */
3403 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3405 res->steps[step].transform = SETCC_TR_LEAxx;
3406 res->steps[step].scale = 1; /* (a << 1) + a */
3409 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3411 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3412 res->steps[step].scale = 1; /* (a << 1) */
3415 res->num_steps = step;
3418 if (! tarval_is_single_bit(t)) {
3419 res->steps[step].transform = SETCC_TR_AND;
3420 res->steps[step].val = v;
3422 res->steps[step].transform = SETCC_TR_NEG;
3424 int v = get_tarval_lowest_bit(t);
3427 res->steps[step].transform = SETCC_TR_SHL;
3428 res->steps[step].scale = v;
3432 res->steps[step].transform = SETCC_TR_SET;
3433 res->num_steps = ++step;
3436 panic("tarval is not long");
3440 * Transforms a Mux node into some code sequence.
3442 * @return The transformed node.
3444 static ir_node *gen_Mux(ir_node *node)
3446 dbg_info *dbgi = get_irn_dbg_info(node);
3447 ir_node *block = get_nodes_block(node);
3448 ir_node *new_block = be_transform_node(block);
3449 ir_node *mux_true = get_Mux_true(node);
3450 ir_node *mux_false = get_Mux_false(node);
3451 ir_node *sel = get_Mux_sel(node);
3452 ir_mode *mode = get_irn_mode(node);
3456 ia32_condition_code_t cc;
3458 assert(get_irn_mode(sel) == mode_b);
3460 is_abs = ir_mux_is_abs(sel, mux_true, mux_false);
3462 if (ia32_mode_needs_gp_reg(mode)) {
3463 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3466 ir_node *op = ir_get_abs_op(sel, mux_true, mux_false);
3467 return create_abs(dbgi, block, op, is_abs < 0, node);
3471 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3472 if (mode_is_float(mode)) {
3473 ir_node *cmp_left = get_Cmp_left(sel);
3474 ir_node *cmp_right = get_Cmp_right(sel);
3475 ir_relation relation = get_Cmp_relation(sel);
3477 if (ia32_cg_config.use_sse2) {
3478 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3479 if (cmp_left == mux_true && cmp_right == mux_false) {
3480 /* Mux(a <= b, a, b) => MIN */
3481 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3482 match_commutative | match_am | match_two_users);
3483 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3484 /* Mux(a <= b, b, a) => MAX */
3485 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3486 match_commutative | match_am | match_two_users);
3488 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3489 if (cmp_left == mux_true && cmp_right == mux_false) {
3490 /* Mux(a >= b, a, b) => MAX */
3491 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3492 match_commutative | match_am | match_two_users);
3493 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3494 /* Mux(a >= b, b, a) => MIN */
3495 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3496 match_commutative | match_am | match_two_users);
3501 if (is_Const(mux_true) && is_Const(mux_false)) {
3502 ia32_address_mode_t am;
3507 flags = get_flags_node(sel, &cc);
3508 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3510 if (ia32_cg_config.use_sse2) {
3511 /* cannot load from different mode on SSE */
3514 /* x87 can load any mode */
3518 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3520 switch (get_mode_size_bytes(new_mode)) {
3530 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3531 set_ia32_am_scale(new_node, 2);
3536 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3537 set_ia32_am_scale(new_node, 1);
3540 /* arg, shift 16 NOT supported */
3542 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3545 panic("Unsupported constant size");
3548 am.ls_mode = new_mode;
3549 am.addr.base = get_symconst_base();
3550 am.addr.index = new_node;
3551 am.addr.mem = nomem;
3553 am.addr.scale = scale;
3554 am.addr.use_frame = 0;
3555 am.addr.tls_segment = false;
3556 am.addr.frame_entity = NULL;
3557 am.addr.symconst_sign = 0;
3558 am.mem_proj = am.addr.mem;
3559 am.op_type = ia32_AddrModeS;
3562 am.pinned = op_pin_state_floats;
3564 am.ins_permuted = false;
3566 if (ia32_cg_config.use_sse2)
3567 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3569 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3570 set_am_attributes(load, &am);
3572 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3574 panic("cannot transform floating point Mux");
3577 assert(ia32_mode_needs_gp_reg(mode));
3580 ir_node *cmp_left = get_Cmp_left(sel);
3581 ir_node *cmp_right = get_Cmp_right(sel);
3582 ir_relation relation = get_Cmp_relation(sel);
3583 ir_node *val_true = mux_true;
3584 ir_node *val_false = mux_false;
3586 if (is_Const(val_true) && is_Const_null(val_true)) {
3587 ir_node *tmp = val_false;
3588 val_false = val_true;
3590 relation = get_negated_relation(relation);
3592 if (is_Const_0(val_false) && is_Sub(val_true)) {
3593 if ((relation & ir_relation_greater)
3594 && get_Sub_left(val_true) == cmp_left
3595 && get_Sub_right(val_true) == cmp_right) {
3596 return create_doz(node, cmp_left, cmp_right);
3598 if ((relation & ir_relation_less)
3599 && get_Sub_left(val_true) == cmp_right
3600 && get_Sub_right(val_true) == cmp_left) {
3601 return create_doz(node, cmp_right, cmp_left);
3606 flags = get_flags_node(sel, &cc);
3608 if (is_Const(mux_true) && is_Const(mux_false)) {
3609 /* both are const, good */
3610 ir_tarval *tv_true = get_Const_tarval(mux_true);
3611 ir_tarval *tv_false = get_Const_tarval(mux_false);
3612 setcc_transform_t res;
3615 find_const_transform(cc, tv_true, tv_false, &res);
3617 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3620 switch (res.steps[step].transform) {
3622 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3623 add_ia32_am_offs_int(new_node, res.steps[step].val);
3625 case SETCC_TR_ADDxx:
3626 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3629 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3630 set_ia32_am_scale(new_node, res.steps[step].scale);
3631 set_ia32_am_offs_int(new_node, res.steps[step].val);
3633 case SETCC_TR_LEAxx:
3634 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3635 set_ia32_am_scale(new_node, res.steps[step].scale);
3636 set_ia32_am_offs_int(new_node, res.steps[step].val);
3639 imm = ia32_immediate_from_long(res.steps[step].scale);
3640 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3643 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3646 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3649 imm = ia32_immediate_from_long(res.steps[step].val);
3650 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3653 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3656 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3659 panic("unknown setcc transform");
3663 new_node = create_CMov(node, sel, flags, cc);
3671 * Create a conversion from x87 state register to general purpose.
3673 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3675 ir_node *block = be_transform_node(get_nodes_block(node));
3676 ir_node *op = get_Conv_op(node);
3677 ir_node *new_op = be_transform_node(op);
3678 ir_graph *irg = current_ir_graph;
3679 dbg_info *dbgi = get_irn_dbg_info(node);
3680 ir_mode *mode = get_irn_mode(node);
3681 ir_node *frame = get_irg_frame(irg);
3682 ir_node *fist, *load, *mem;
3684 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3685 set_irn_pinned(fist, op_pin_state_floats);
3686 set_ia32_use_frame(fist);
3687 set_ia32_op_type(fist, ia32_AddrModeD);
3689 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3690 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3692 assert(get_mode_size_bits(mode) <= 32);
3693 /* exception we can only store signed 32 bit integers, so for unsigned
3694 we store a 64bit (signed) integer and load the lower bits */
3695 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3696 set_ia32_ls_mode(fist, mode_Ls);
3698 set_ia32_ls_mode(fist, mode_Is);
3700 SET_IA32_ORIG_NODE(fist, node);
3703 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3705 set_irn_pinned(load, op_pin_state_floats);
3706 set_ia32_use_frame(load);
3707 set_ia32_op_type(load, ia32_AddrModeS);
3708 set_ia32_ls_mode(load, mode_Is);
3709 if (get_ia32_ls_mode(fist) == mode_Ls) {
3710 ia32_attr_t *attr = get_ia32_attr(load);
3711 attr->data.need_64bit_stackent = 1;
3713 ia32_attr_t *attr = get_ia32_attr(load);
3714 attr->data.need_32bit_stackent = 1;
3716 SET_IA32_ORIG_NODE(load, node);
3718 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3722 * Creates a x87 strict Conv by placing a Store and a Load
3724 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3726 ir_node *block = get_nodes_block(node);
3727 ir_graph *irg = get_Block_irg(block);
3728 dbg_info *dbgi = get_irn_dbg_info(node);
3729 ir_node *frame = get_irg_frame(irg);
3731 ir_node *store, *load;
3734 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3735 set_ia32_use_frame(store);
3736 set_ia32_op_type(store, ia32_AddrModeD);
3737 SET_IA32_ORIG_NODE(store, node);
3739 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3741 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3742 set_ia32_use_frame(load);
3743 set_ia32_op_type(load, ia32_AddrModeS);
3744 SET_IA32_ORIG_NODE(load, node);
3746 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3750 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3751 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3753 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3755 func = get_mode_size_bits(mode) == 8 ?
3756 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3757 return func(dbgi, block, base, index, mem, val, mode);
3761 * Create a conversion from general purpose to x87 register
3763 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3765 ir_node *src_block = get_nodes_block(node);
3766 ir_node *block = be_transform_node(src_block);
3767 ir_graph *irg = get_Block_irg(block);
3768 dbg_info *dbgi = get_irn_dbg_info(node);
3769 ir_node *op = get_Conv_op(node);
3770 ir_node *new_op = NULL;
3772 ir_mode *store_mode;
3778 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3779 if (possible_int_mode_for_fp(src_mode)) {
3780 ia32_address_mode_t am;
3782 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3783 if (am.op_type == ia32_AddrModeS) {
3784 ia32_address_t *addr = &am.addr;
3786 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3787 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3789 set_am_attributes(fild, &am);
3790 SET_IA32_ORIG_NODE(fild, node);
3792 fix_mem_proj(fild, &am);
3797 if (new_op == NULL) {
3798 new_op = be_transform_node(op);
3801 mode = get_irn_mode(op);
3803 /* first convert to 32 bit signed if necessary */
3804 if (get_mode_size_bits(src_mode) < 32) {
3805 if (!upper_bits_clean(new_op, src_mode)) {
3806 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3807 SET_IA32_ORIG_NODE(new_op, node);
3812 assert(get_mode_size_bits(mode) == 32);
3815 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3817 set_ia32_use_frame(store);
3818 set_ia32_op_type(store, ia32_AddrModeD);
3819 set_ia32_ls_mode(store, mode_Iu);
3821 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3823 /* exception for 32bit unsigned, do a 64bit spill+load */
3824 if (!mode_is_signed(mode)) {
3827 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3829 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3830 noreg_GP, nomem, zero_const);
3831 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3833 set_ia32_use_frame(zero_store);
3834 set_ia32_op_type(zero_store, ia32_AddrModeD);
3835 add_ia32_am_offs_int(zero_store, 4);
3836 set_ia32_ls_mode(zero_store, mode_Iu);
3838 in[0] = zero_store_mem;
3841 store_mem = new_rd_Sync(dbgi, block, 2, in);
3842 store_mode = mode_Ls;
3844 store_mode = mode_Is;
3848 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3850 set_ia32_use_frame(fild);
3851 set_ia32_op_type(fild, ia32_AddrModeS);
3852 set_ia32_ls_mode(fild, store_mode);
3854 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3860 * Create a conversion from one integer mode into another one
3862 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3863 dbg_info *dbgi, ir_node *block, ir_node *op,
3866 ir_node *new_block = be_transform_node(block);
3868 ir_mode *smaller_mode;
3869 ia32_address_mode_t am;
3870 ia32_address_t *addr = &am.addr;
3873 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3874 smaller_mode = src_mode;
3876 smaller_mode = tgt_mode;
3879 #ifdef DEBUG_libfirm
3881 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3886 match_arguments(&am, block, NULL, op, NULL,
3887 match_am | match_8bit_am | match_16bit_am);
3889 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3890 /* unnecessary conv. in theory it shouldn't have been AM */
3891 assert(is_ia32_NoReg_GP(addr->base));
3892 assert(is_ia32_NoReg_GP(addr->index));
3893 assert(is_NoMem(addr->mem));
3894 assert(am.addr.offset == 0);
3895 assert(am.addr.symconst_ent == NULL);
3899 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3900 addr->mem, am.new_op2, smaller_mode);
3901 set_am_attributes(new_node, &am);
3902 /* match_arguments assume that out-mode = in-mode, this isn't true here
3904 set_ia32_ls_mode(new_node, smaller_mode);
3905 SET_IA32_ORIG_NODE(new_node, node);
3906 new_node = fix_mem_proj(new_node, &am);
3911 * Transforms a Conv node.
3913 * @return The created ia32 Conv node
3915 static ir_node *gen_Conv(ir_node *node)
3917 ir_node *block = get_nodes_block(node);
3918 ir_node *new_block = be_transform_node(block);
3919 ir_node *op = get_Conv_op(node);
3920 ir_node *new_op = NULL;
3921 dbg_info *dbgi = get_irn_dbg_info(node);
3922 ir_mode *src_mode = get_irn_mode(op);
3923 ir_mode *tgt_mode = get_irn_mode(node);
3924 int src_bits = get_mode_size_bits(src_mode);
3925 int tgt_bits = get_mode_size_bits(tgt_mode);
3926 ir_node *res = NULL;
3928 assert(!mode_is_int(src_mode) || src_bits <= 32);
3929 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3931 /* modeB -> X should already be lowered by the lower_mode_b pass */
3932 if (src_mode == mode_b) {
3933 panic("ConvB not lowered %+F", node);
3936 if (src_mode == tgt_mode) {
3937 if (get_Conv_strict(node)) {
3938 if (ia32_cg_config.use_sse2) {
3939 /* when we are in SSE mode, we can kill all strict no-op conversion */
3940 return be_transform_node(op);
3943 /* this should be optimized already, but who knows... */
3944 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3945 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3946 return be_transform_node(op);
3950 if (mode_is_float(src_mode)) {
3951 new_op = be_transform_node(op);
3952 /* we convert from float ... */
3953 if (mode_is_float(tgt_mode)) {
3955 if (ia32_cg_config.use_sse2) {
3956 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3957 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3959 set_ia32_ls_mode(res, tgt_mode);
3961 if (get_Conv_strict(node)) {
3962 /* if fp_no_float_fold is not set then we assume that we
3963 * don't have any float operations in a non
3964 * mode_float_arithmetic mode and can skip strict upconvs */
3965 if (src_bits < tgt_bits) {
3966 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3969 res = gen_x87_strict_conv(tgt_mode, new_op);
3970 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3974 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3979 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3980 if (ia32_cg_config.use_sse2) {
3981 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3983 set_ia32_ls_mode(res, src_mode);
3985 return gen_x87_fp_to_gp(node);
3989 /* we convert from int ... */
3990 if (mode_is_float(tgt_mode)) {
3992 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3993 if (ia32_cg_config.use_sse2) {
3994 new_op = be_transform_node(op);
3995 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3997 set_ia32_ls_mode(res, tgt_mode);
3999 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
4000 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
4001 res = gen_x87_gp_to_fp(node, src_mode);
4003 /* we need a strict-Conv, if the int mode has more bits than the
4005 if (float_mantissa < int_mantissa) {
4006 res = gen_x87_strict_conv(tgt_mode, res);
4007 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
4011 } else if (tgt_mode == mode_b) {
4012 /* mode_b lowering already took care that we only have 0/1 values */
4013 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4014 src_mode, tgt_mode));
4015 return be_transform_node(op);
4018 if (src_bits == tgt_bits) {
4019 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4020 src_mode, tgt_mode));
4021 return be_transform_node(op);
4024 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4032 static ir_node *create_immediate_or_transform(ir_node *node,
4033 char immediate_constraint_type)
4035 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4036 if (new_node == NULL) {
4037 new_node = be_transform_node(node);
4043 * Transforms a FrameAddr into an ia32 Add.
4045 static ir_node *gen_be_FrameAddr(ir_node *node)
4047 ir_node *block = be_transform_node(get_nodes_block(node));
4048 ir_node *op = be_get_FrameAddr_frame(node);
4049 ir_node *new_op = be_transform_node(op);
4050 dbg_info *dbgi = get_irn_dbg_info(node);
4053 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4054 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4055 set_ia32_use_frame(new_node);
4057 SET_IA32_ORIG_NODE(new_node, node);
4063 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4065 static ir_node *gen_be_Return(ir_node *node)
4067 ir_graph *irg = current_ir_graph;
4068 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4069 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4070 ir_node *new_ret_val = be_transform_node(ret_val);
4071 ir_node *new_ret_mem = be_transform_node(ret_mem);
4072 ir_entity *ent = get_irg_entity(irg);
4073 ir_type *tp = get_entity_type(ent);
4074 dbg_info *dbgi = get_irn_dbg_info(node);
4075 ir_node *block = be_transform_node(get_nodes_block(node));
4089 assert(ret_val != NULL);
4090 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4091 return be_duplicate_node(node);
4094 res_type = get_method_res_type(tp, 0);
4096 if (! is_Primitive_type(res_type)) {
4097 return be_duplicate_node(node);
4100 mode = get_type_mode(res_type);
4101 if (! mode_is_float(mode)) {
4102 return be_duplicate_node(node);
4105 assert(get_method_n_ress(tp) == 1);
4107 frame = get_irg_frame(irg);
4109 /* store xmm0 onto stack */
4110 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4111 new_ret_mem, new_ret_val);
4112 set_ia32_ls_mode(sse_store, mode);
4113 set_ia32_op_type(sse_store, ia32_AddrModeD);
4114 set_ia32_use_frame(sse_store);
4115 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4117 /* load into x87 register */
4118 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4119 set_ia32_op_type(fld, ia32_AddrModeS);
4120 set_ia32_use_frame(fld);
4122 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4123 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4125 /* create a new return */
4126 arity = get_irn_arity(node);
4127 in = ALLOCAN(ir_node*, arity);
4128 pop = be_Return_get_pop(node);
4129 for (i = 0; i < arity; ++i) {
4130 ir_node *op = get_irn_n(node, i);
4131 if (op == ret_val) {
4133 } else if (op == ret_mem) {
4136 in[i] = be_transform_node(op);
4139 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4140 copy_node_attr(irg, node, new_node);
4146 * Transform a be_AddSP into an ia32_SubSP.
4148 static ir_node *gen_be_AddSP(ir_node *node)
4150 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4151 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4153 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4154 match_am | match_immediate);
4155 assert(is_ia32_SubSP(new_node));
4156 arch_irn_set_register(new_node, pn_ia32_SubSP_stack,
4157 &ia32_registers[REG_ESP]);
4162 * Transform a be_SubSP into an ia32_AddSP
4164 static ir_node *gen_be_SubSP(ir_node *node)
4166 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4167 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4169 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4170 match_am | match_immediate);
4171 assert(is_ia32_AddSP(new_node));
4172 arch_irn_set_register(new_node, pn_ia32_AddSP_stack,
4173 &ia32_registers[REG_ESP]);
4178 * Change some phi modes
4180 static ir_node *gen_Phi(ir_node *node)
4182 const arch_register_req_t *req;
4183 ir_node *block = be_transform_node(get_nodes_block(node));
4184 ir_graph *irg = current_ir_graph;
4185 dbg_info *dbgi = get_irn_dbg_info(node);
4186 ir_mode *mode = get_irn_mode(node);
4189 if (ia32_mode_needs_gp_reg(mode)) {
4190 /* we shouldn't have any 64bit stuff around anymore */
4191 assert(get_mode_size_bits(mode) <= 32);
4192 /* all integer operations are on 32bit registers now */
4194 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4195 } else if (mode_is_float(mode)) {
4196 if (ia32_cg_config.use_sse2) {
4198 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4201 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4204 req = arch_no_register_req;
4207 /* phi nodes allow loops, so we use the old arguments for now
4208 * and fix this later */
4209 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4210 get_irn_in(node) + 1);
4211 copy_node_attr(irg, node, phi);
4212 be_duplicate_deps(node, phi);
4214 arch_set_out_register_req(phi, 0, req);
4216 be_enqueue_preds(node);
4221 static ir_node *gen_Jmp(ir_node *node)
4223 ir_node *block = get_nodes_block(node);
4224 ir_node *new_block = be_transform_node(block);
4225 dbg_info *dbgi = get_irn_dbg_info(node);
4228 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4229 SET_IA32_ORIG_NODE(new_node, node);
4237 static ir_node *gen_IJmp(ir_node *node)
4239 ir_node *block = get_nodes_block(node);
4240 ir_node *new_block = be_transform_node(block);
4241 dbg_info *dbgi = get_irn_dbg_info(node);
4242 ir_node *op = get_IJmp_target(node);
4244 ia32_address_mode_t am;
4245 ia32_address_t *addr = &am.addr;
4247 assert(get_irn_mode(op) == mode_P);
4249 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4251 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4252 addr->mem, am.new_op2);
4253 set_am_attributes(new_node, &am);
4254 SET_IA32_ORIG_NODE(new_node, node);
4256 new_node = fix_mem_proj(new_node, &am);
4261 static ir_node *gen_ia32_l_Add(ir_node *node)
4263 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4264 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4265 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4266 match_commutative | match_am | match_immediate |
4267 match_mode_neutral);
4269 if (is_Proj(lowered)) {
4270 lowered = get_Proj_pred(lowered);
4272 assert(is_ia32_Add(lowered));
4273 set_irn_mode(lowered, mode_T);
4279 static ir_node *gen_ia32_l_Adc(ir_node *node)
4281 return gen_binop_flags(node, new_bd_ia32_Adc,
4282 match_commutative | match_am | match_immediate |
4283 match_mode_neutral);
4287 * Transforms a l_MulS into a "real" MulS node.
4289 * @return the created ia32 Mul node
4291 static ir_node *gen_ia32_l_Mul(ir_node *node)
4293 ir_node *left = get_binop_left(node);
4294 ir_node *right = get_binop_right(node);
4296 return gen_binop(node, left, right, new_bd_ia32_Mul,
4297 match_commutative | match_am | match_mode_neutral);
4301 * Transforms a l_IMulS into a "real" IMul1OPS node.
4303 * @return the created ia32 IMul1OP node
4305 static ir_node *gen_ia32_l_IMul(ir_node *node)
4307 ir_node *left = get_binop_left(node);
4308 ir_node *right = get_binop_right(node);
4310 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4311 match_commutative | match_am | match_mode_neutral);
4314 static ir_node *gen_ia32_l_Sub(ir_node *node)
4316 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4317 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4318 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4319 match_am | match_immediate | match_mode_neutral);
4321 if (is_Proj(lowered)) {
4322 lowered = get_Proj_pred(lowered);
4324 assert(is_ia32_Sub(lowered));
4325 set_irn_mode(lowered, mode_T);
4331 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4333 return gen_binop_flags(node, new_bd_ia32_Sbb,
4334 match_am | match_immediate | match_mode_neutral);
4337 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4339 ir_node *src_block = get_nodes_block(node);
4340 ir_node *block = be_transform_node(src_block);
4341 ir_graph *irg = current_ir_graph;
4342 dbg_info *dbgi = get_irn_dbg_info(node);
4343 ir_node *frame = get_irg_frame(irg);
4344 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4345 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4346 ir_node *new_val_low = be_transform_node(val_low);
4347 ir_node *new_val_high = be_transform_node(val_high);
4349 ir_node *sync, *fild, *res;
4351 ir_node *store_high;
4355 if (ia32_cg_config.use_sse2) {
4356 panic("ia32_l_LLtoFloat not implemented for SSE2");
4360 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4362 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4364 SET_IA32_ORIG_NODE(store_low, node);
4365 SET_IA32_ORIG_NODE(store_high, node);
4367 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4368 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4370 set_ia32_use_frame(store_low);
4371 set_ia32_use_frame(store_high);
4372 set_ia32_op_type(store_low, ia32_AddrModeD);
4373 set_ia32_op_type(store_high, ia32_AddrModeD);
4374 set_ia32_ls_mode(store_low, mode_Iu);
4375 set_ia32_ls_mode(store_high, mode_Is);
4376 add_ia32_am_offs_int(store_high, 4);
4380 sync = new_rd_Sync(dbgi, block, 2, in);
4383 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4385 set_ia32_use_frame(fild);
4386 set_ia32_op_type(fild, ia32_AddrModeS);
4387 set_ia32_ls_mode(fild, mode_Ls);
4389 SET_IA32_ORIG_NODE(fild, node);
4391 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4393 if (! mode_is_signed(get_irn_mode(val_high))) {
4394 ia32_address_mode_t am;
4396 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4399 am.addr.base = get_symconst_base();
4400 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4401 am.addr.mem = nomem;
4404 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4405 am.addr.tls_segment = false;
4406 am.addr.use_frame = 0;
4407 am.addr.frame_entity = NULL;
4408 am.addr.symconst_sign = 0;
4409 am.ls_mode = mode_F;
4410 am.mem_proj = nomem;
4411 am.op_type = ia32_AddrModeS;
4413 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4414 am.pinned = op_pin_state_floats;
4416 am.ins_permuted = false;
4418 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4419 am.new_op1, am.new_op2, get_fpcw());
4420 set_am_attributes(fadd, &am);
4422 set_irn_mode(fadd, mode_T);
4423 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4428 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4430 ir_node *src_block = get_nodes_block(node);
4431 ir_node *block = be_transform_node(src_block);
4432 ir_graph *irg = get_Block_irg(block);
4433 dbg_info *dbgi = get_irn_dbg_info(node);
4434 ir_node *frame = get_irg_frame(irg);
4435 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4436 ir_node *new_val = be_transform_node(val);
4439 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4440 SET_IA32_ORIG_NODE(fist, node);
4441 set_ia32_use_frame(fist);
4442 set_ia32_op_type(fist, ia32_AddrModeD);
4443 set_ia32_ls_mode(fist, mode_Ls);
4445 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4446 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4449 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4451 ir_node *block = be_transform_node(get_nodes_block(node));
4452 ir_graph *irg = get_Block_irg(block);
4453 ir_node *pred = get_Proj_pred(node);
4454 ir_node *new_pred = be_transform_node(pred);
4455 ir_node *frame = get_irg_frame(irg);
4456 dbg_info *dbgi = get_irn_dbg_info(node);
4457 long pn = get_Proj_proj(node);
4462 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4463 SET_IA32_ORIG_NODE(load, node);
4464 set_ia32_use_frame(load);
4465 set_ia32_op_type(load, ia32_AddrModeS);
4466 set_ia32_ls_mode(load, mode_Iu);
4467 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4468 * 32 bit from it with this particular load */
4469 attr = get_ia32_attr(load);
4470 attr->data.need_64bit_stackent = 1;
4472 if (pn == pn_ia32_l_FloattoLL_res_high) {
4473 add_ia32_am_offs_int(load, 4);
4475 assert(pn == pn_ia32_l_FloattoLL_res_low);
4478 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4484 * Transform the Projs of an AddSP.
4486 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4488 ir_node *pred = get_Proj_pred(node);
4489 ir_node *new_pred = be_transform_node(pred);
4490 dbg_info *dbgi = get_irn_dbg_info(node);
4491 long proj = get_Proj_proj(node);
4493 if (proj == pn_be_AddSP_sp) {
4494 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4495 pn_ia32_SubSP_stack);
4496 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4498 } else if (proj == pn_be_AddSP_res) {
4499 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4500 pn_ia32_SubSP_addr);
4501 } else if (proj == pn_be_AddSP_M) {
4502 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4505 panic("No idea how to transform proj->AddSP");
4509 * Transform the Projs of a SubSP.
4511 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4513 ir_node *pred = get_Proj_pred(node);
4514 ir_node *new_pred = be_transform_node(pred);
4515 dbg_info *dbgi = get_irn_dbg_info(node);
4516 long proj = get_Proj_proj(node);
4518 if (proj == pn_be_SubSP_sp) {
4519 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4520 pn_ia32_AddSP_stack);
4521 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4523 } else if (proj == pn_be_SubSP_M) {
4524 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4527 panic("No idea how to transform proj->SubSP");
4531 * Transform and renumber the Projs from a Load.
4533 static ir_node *gen_Proj_Load(ir_node *node)
4536 ir_node *pred = get_Proj_pred(node);
4537 dbg_info *dbgi = get_irn_dbg_info(node);
4538 long proj = get_Proj_proj(node);
4540 /* loads might be part of source address mode matches, so we don't
4541 * transform the ProjMs yet (with the exception of loads whose result is
4544 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4547 /* this is needed, because sometimes we have loops that are only
4548 reachable through the ProjM */
4549 be_enqueue_preds(node);
4550 /* do it in 2 steps, to silence firm verifier */
4551 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4552 set_Proj_proj(res, pn_ia32_mem);
4556 /* renumber the proj */
4557 new_pred = be_transform_node(pred);
4558 if (is_ia32_Load(new_pred)) {
4559 switch ((pn_Load)proj) {
4561 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4563 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4564 case pn_Load_X_except:
4565 /* This Load might raise an exception. Mark it. */
4566 set_ia32_exc_label(new_pred, 1);
4567 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4568 case pn_Load_X_regular:
4569 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4571 } else if (is_ia32_Conv_I2I(new_pred) ||
4572 is_ia32_Conv_I2I8Bit(new_pred)) {
4573 set_irn_mode(new_pred, mode_T);
4574 switch ((pn_Load)proj) {
4576 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4578 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4579 case pn_Load_X_except:
4580 /* This Load might raise an exception. Mark it. */
4581 set_ia32_exc_label(new_pred, 1);
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4583 case pn_Load_X_regular:
4584 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4586 } else if (is_ia32_xLoad(new_pred)) {
4587 switch ((pn_Load)proj) {
4589 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4591 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4592 case pn_Load_X_except:
4593 /* This Load might raise an exception. Mark it. */
4594 set_ia32_exc_label(new_pred, 1);
4595 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4596 case pn_Load_X_regular:
4597 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4599 } else if (is_ia32_vfld(new_pred)) {
4600 switch ((pn_Load)proj) {
4602 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4604 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4605 case pn_Load_X_except:
4606 /* This Load might raise an exception. Mark it. */
4607 set_ia32_exc_label(new_pred, 1);
4608 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4609 case pn_Load_X_regular:
4610 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4613 /* can happen for ProJMs when source address mode happened for the
4616 /* however it should not be the result proj, as that would mean the
4617 load had multiple users and should not have been used for
4619 if (proj != pn_Load_M) {
4620 panic("internal error: transformed node not a Load");
4622 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4625 panic("No idea how to transform Proj(Load) %+F", node);
4628 static ir_node *gen_Proj_Store(ir_node *node)
4630 ir_node *pred = get_Proj_pred(node);
4631 ir_node *new_pred = be_transform_node(pred);
4632 dbg_info *dbgi = get_irn_dbg_info(node);
4633 long pn = get_Proj_proj(node);
4635 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4636 switch ((pn_Store)pn) {
4638 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4639 case pn_Store_X_except:
4640 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4641 case pn_Store_X_regular:
4642 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4644 } else if (is_ia32_vfist(new_pred)) {
4645 switch ((pn_Store)pn) {
4647 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4648 case pn_Store_X_except:
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4650 case pn_Store_X_regular:
4651 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4653 } else if (is_ia32_vfisttp(new_pred)) {
4654 switch ((pn_Store)pn) {
4656 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4657 case pn_Store_X_except:
4658 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4659 case pn_Store_X_regular:
4660 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4662 } else if (is_ia32_vfst(new_pred)) {
4663 switch ((pn_Store)pn) {
4665 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4666 case pn_Store_X_except:
4667 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4668 case pn_Store_X_regular:
4669 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4671 } else if (is_ia32_xStore(new_pred)) {
4672 switch ((pn_Store)pn) {
4674 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4675 case pn_Store_X_except:
4676 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4677 case pn_Store_X_regular:
4678 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4680 } else if (is_Sync(new_pred)) {
4681 /* hack for the case that gen_float_const_Store produced a Sync */
4682 if (pn == pn_Store_M) {
4685 panic("exception control flow for gen_float_const_Store not implemented yet");
4686 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4687 /* destination address mode */
4688 if (pn == pn_Store_M) {
4691 panic("exception control flow for destination AM not implemented yet");
4694 panic("No idea how to transform Proj(Store) %+F", node);
4698 * Transform and renumber the Projs from a Div or Mod instruction.
4700 static ir_node *gen_Proj_Div(ir_node *node)
4702 ir_node *pred = get_Proj_pred(node);
4703 ir_node *new_pred = be_transform_node(pred);
4704 dbg_info *dbgi = get_irn_dbg_info(node);
4705 long proj = get_Proj_proj(node);
4707 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4708 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4710 switch ((pn_Div)proj) {
4712 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4713 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4714 } else if (is_ia32_xDiv(new_pred)) {
4715 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4716 } else if (is_ia32_vfdiv(new_pred)) {
4717 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4719 panic("Div transformed to unexpected thing %+F", new_pred);
4722 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4723 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4724 } else if (is_ia32_xDiv(new_pred)) {
4725 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4726 } else if (is_ia32_vfdiv(new_pred)) {
4727 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4729 panic("Div transformed to unexpected thing %+F", new_pred);
4731 case pn_Div_X_except:
4732 set_ia32_exc_label(new_pred, 1);
4733 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4734 case pn_Div_X_regular:
4735 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4738 panic("No idea how to transform proj->Div");
4742 * Transform and renumber the Projs from a Div or Mod instruction.
4744 static ir_node *gen_Proj_Mod(ir_node *node)
4746 ir_node *pred = get_Proj_pred(node);
4747 ir_node *new_pred = be_transform_node(pred);
4748 dbg_info *dbgi = get_irn_dbg_info(node);
4749 long proj = get_Proj_proj(node);
4751 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4752 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4753 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4755 switch ((pn_Mod)proj) {
4757 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4759 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4760 case pn_Mod_X_except:
4761 set_ia32_exc_label(new_pred, 1);
4762 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4763 case pn_Mod_X_regular:
4764 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4766 panic("No idea how to transform proj->Mod");
4770 * Transform and renumber the Projs from a CopyB.
4772 static ir_node *gen_Proj_CopyB(ir_node *node)
4774 ir_node *pred = get_Proj_pred(node);
4775 ir_node *new_pred = be_transform_node(pred);
4776 dbg_info *dbgi = get_irn_dbg_info(node);
4777 long proj = get_Proj_proj(node);
4779 switch ((pn_CopyB)proj) {
4781 if (is_ia32_CopyB_i(new_pred)) {
4782 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4783 } else if (is_ia32_CopyB(new_pred)) {
4784 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4787 case pn_CopyB_X_regular:
4788 if (is_ia32_CopyB_i(new_pred)) {
4789 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4790 } else if (is_ia32_CopyB(new_pred)) {
4791 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4794 case pn_CopyB_X_except:
4795 if (is_ia32_CopyB_i(new_pred)) {
4796 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4797 } else if (is_ia32_CopyB(new_pred)) {
4798 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4803 panic("No idea how to transform proj->CopyB");
4806 static ir_node *gen_be_Call(ir_node *node)
4808 dbg_info *const dbgi = get_irn_dbg_info(node);
4809 ir_node *const src_block = get_nodes_block(node);
4810 ir_node *const block = be_transform_node(src_block);
4811 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4812 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4813 ir_node *const sp = be_transform_node(src_sp);
4814 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4815 ia32_address_mode_t am;
4816 ia32_address_t *const addr = &am.addr;
4821 ir_node * eax = noreg_GP;
4822 ir_node * ecx = noreg_GP;
4823 ir_node * edx = noreg_GP;
4824 unsigned const pop = be_Call_get_pop(node);
4825 ir_type *const call_tp = be_Call_get_type(node);
4826 int old_no_pic_adjust;
4827 int throws_exception = ir_throws_exception(node);
4829 /* Run the x87 simulator if the call returns a float value */
4830 if (get_method_n_ress(call_tp) > 0) {
4831 ir_type *const res_type = get_method_res_type(call_tp, 0);
4832 ir_mode *const res_mode = get_type_mode(res_type);
4834 if (res_mode != NULL && mode_is_float(res_mode)) {
4835 ir_graph *irg = current_ir_graph;
4836 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4837 irg_data->do_x87_sim = 1;
4841 /* We do not want be_Call direct calls */
4842 assert(be_Call_get_entity(node) == NULL);
4844 /* special case for PIC trampoline calls */
4845 old_no_pic_adjust = ia32_no_pic_adjust;
4846 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4848 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4849 match_am | match_immediate);
4851 ia32_no_pic_adjust = old_no_pic_adjust;
4853 i = get_irn_arity(node) - 1;
4854 fpcw = be_transform_node(get_irn_n(node, i--));
4855 for (; i >= n_be_Call_first_arg; --i) {
4856 arch_register_req_t const *const req = arch_get_register_req(node, i);
4857 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4859 assert(req->type == arch_register_req_type_limited);
4860 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4862 switch (*req->limited) {
4863 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4864 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4865 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4866 default: panic("Invalid GP register for register parameter");
4870 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4871 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4872 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4873 ir_set_throws_exception(call, throws_exception);
4874 set_am_attributes(call, &am);
4875 call = fix_mem_proj(call, &am);
4877 if (get_irn_pinned(node) == op_pin_state_pinned)
4878 set_irn_pinned(call, op_pin_state_pinned);
4880 SET_IA32_ORIG_NODE(call, node);
4882 if (ia32_cg_config.use_sse2) {
4883 /* remember this call for post-processing */
4884 ARR_APP1(ir_node *, call_list, call);
4885 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4892 * Transform Builtin trap
4894 static ir_node *gen_trap(ir_node *node)
4896 dbg_info *dbgi = get_irn_dbg_info(node);
4897 ir_node *block = be_transform_node(get_nodes_block(node));
4898 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4900 return new_bd_ia32_UD2(dbgi, block, mem);
4904 * Transform Builtin debugbreak
4906 static ir_node *gen_debugbreak(ir_node *node)
4908 dbg_info *dbgi = get_irn_dbg_info(node);
4909 ir_node *block = be_transform_node(get_nodes_block(node));
4910 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4912 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4916 * Transform Builtin return_address
4918 static ir_node *gen_return_address(ir_node *node)
4920 ir_node *param = get_Builtin_param(node, 0);
4921 ir_node *frame = get_Builtin_param(node, 1);
4922 dbg_info *dbgi = get_irn_dbg_info(node);
4923 ir_tarval *tv = get_Const_tarval(param);
4924 ir_graph *irg = get_irn_irg(node);
4925 unsigned long value = get_tarval_long(tv);
4927 ir_node *block = be_transform_node(get_nodes_block(node));
4928 ir_node *ptr = be_transform_node(frame);
4932 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4933 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4934 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4937 /* load the return address from this frame */
4938 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4940 set_irn_pinned(load, get_irn_pinned(node));
4941 set_ia32_op_type(load, ia32_AddrModeS);
4942 set_ia32_ls_mode(load, mode_Iu);
4944 set_ia32_am_offs_int(load, 0);
4945 set_ia32_use_frame(load);
4946 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4948 if (get_irn_pinned(node) == op_pin_state_floats) {
4949 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4950 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4951 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4952 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4955 SET_IA32_ORIG_NODE(load, node);
4956 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4960 * Transform Builtin frame_address
4962 static ir_node *gen_frame_address(ir_node *node)
4964 ir_node *param = get_Builtin_param(node, 0);
4965 ir_node *frame = get_Builtin_param(node, 1);
4966 dbg_info *dbgi = get_irn_dbg_info(node);
4967 ir_tarval *tv = get_Const_tarval(param);
4968 ir_graph *irg = get_irn_irg(node);
4969 unsigned long value = get_tarval_long(tv);
4971 ir_node *block = be_transform_node(get_nodes_block(node));
4972 ir_node *ptr = be_transform_node(frame);
4977 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4978 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4979 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4982 /* load the frame address from this frame */
4983 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4985 set_irn_pinned(load, get_irn_pinned(node));
4986 set_ia32_op_type(load, ia32_AddrModeS);
4987 set_ia32_ls_mode(load, mode_Iu);
4989 ent = ia32_get_frame_address_entity(irg);
4991 set_ia32_am_offs_int(load, 0);
4992 set_ia32_use_frame(load);
4993 set_ia32_frame_ent(load, ent);
4995 /* will fail anyway, but gcc does this: */
4996 set_ia32_am_offs_int(load, 0);
4999 if (get_irn_pinned(node) == op_pin_state_floats) {
5000 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
5001 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
5002 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
5003 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
5006 SET_IA32_ORIG_NODE(load, node);
5007 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
5011 * Transform Builtin frame_address
5013 static ir_node *gen_prefetch(ir_node *node)
5016 ir_node *ptr, *block, *mem, *base, *index;
5017 ir_node *param, *new_node;
5020 ia32_address_t addr;
5022 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5023 /* no prefetch at all, route memory */
5024 return be_transform_node(get_Builtin_mem(node));
5027 param = get_Builtin_param(node, 1);
5028 tv = get_Const_tarval(param);
5029 rw = get_tarval_long(tv);
5031 /* construct load address */
5032 memset(&addr, 0, sizeof(addr));
5033 ptr = get_Builtin_param(node, 0);
5034 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5041 base = be_transform_node(base);
5044 if (index == NULL) {
5047 index = be_transform_node(index);
5050 dbgi = get_irn_dbg_info(node);
5051 block = be_transform_node(get_nodes_block(node));
5052 mem = be_transform_node(get_Builtin_mem(node));
5054 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5055 /* we have 3DNow!, this was already checked above */
5056 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
5057 } else if (ia32_cg_config.use_sse_prefetch) {
5058 /* note: rw == 1 is IGNORED in that case */
5059 param = get_Builtin_param(node, 2);
5060 tv = get_Const_tarval(param);
5061 locality = get_tarval_long(tv);
5063 /* SSE style prefetch */
5066 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
5069 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
5072 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
5075 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
5079 assert(ia32_cg_config.use_3dnow_prefetch);
5080 /* 3DNow! style prefetch */
5081 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
5084 set_irn_pinned(new_node, get_irn_pinned(node));
5085 set_ia32_op_type(new_node, ia32_AddrModeS);
5086 set_ia32_ls_mode(new_node, mode_Bu);
5087 set_address(new_node, &addr);
5089 SET_IA32_ORIG_NODE(new_node, node);
5091 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5095 * Transform bsf like node
5097 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5099 ir_node *param = get_Builtin_param(node, 0);
5100 dbg_info *dbgi = get_irn_dbg_info(node);
5102 ir_node *block = get_nodes_block(node);
5103 ir_node *new_block = be_transform_node(block);
5105 ia32_address_mode_t am;
5106 ia32_address_t *addr = &am.addr;
5109 match_arguments(&am, block, NULL, param, NULL, match_am);
5111 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5112 set_am_attributes(cnt, &am);
5113 set_ia32_ls_mode(cnt, get_irn_mode(param));
5115 SET_IA32_ORIG_NODE(cnt, node);
5116 return fix_mem_proj(cnt, &am);
5120 * Transform builtin ffs.
5122 static ir_node *gen_ffs(ir_node *node)
5124 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5125 ir_node *real = skip_Proj(bsf);
5126 dbg_info *dbgi = get_irn_dbg_info(real);
5127 ir_node *block = get_nodes_block(real);
5128 ir_node *flag, *set, *conv, *neg, *orn, *add;
5131 if (get_irn_mode(real) != mode_T) {
5132 set_irn_mode(real, mode_T);
5133 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5136 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5139 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5140 SET_IA32_ORIG_NODE(set, node);
5143 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5144 SET_IA32_ORIG_NODE(conv, node);
5147 neg = new_bd_ia32_Neg(dbgi, block, conv);
5150 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5151 set_ia32_commutative(orn);
5154 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5155 add_ia32_am_offs_int(add, 1);
5160 * Transform builtin clz.
5162 static ir_node *gen_clz(ir_node *node)
5164 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5165 ir_node *real = skip_Proj(bsr);
5166 dbg_info *dbgi = get_irn_dbg_info(real);
5167 ir_node *block = get_nodes_block(real);
5168 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5170 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5174 * Transform builtin ctz.
5176 static ir_node *gen_ctz(ir_node *node)
5178 return gen_unop_AM(node, new_bd_ia32_Bsf);
5182 * Transform builtin parity.
5184 static ir_node *gen_parity(ir_node *node)
5186 dbg_info *dbgi = get_irn_dbg_info(node);
5187 ir_node *block = get_nodes_block(node);
5188 ir_node *new_block = be_transform_node(block);
5189 ir_node *param = get_Builtin_param(node, 0);
5190 ir_node *new_param = be_transform_node(param);
5193 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5194 * so we have to do complicated xoring first.
5195 * (we should also better lower this before the backend so we still have a
5196 * chance for CSE, constant folding and other goodies for some of these
5199 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5200 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5201 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5203 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5206 set_irn_mode(xor2, mode_T);
5207 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5210 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5211 SET_IA32_ORIG_NODE(new_node, node);
5214 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5215 nomem, new_node, mode_Bu);
5216 SET_IA32_ORIG_NODE(new_node, node);
5221 * Transform builtin popcount
5223 static ir_node *gen_popcount(ir_node *node)
5225 ir_node *param = get_Builtin_param(node, 0);
5226 dbg_info *dbgi = get_irn_dbg_info(node);
5228 ir_node *block = get_nodes_block(node);
5229 ir_node *new_block = be_transform_node(block);
5232 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5234 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5235 if (ia32_cg_config.use_popcnt) {
5236 ia32_address_mode_t am;
5237 ia32_address_t *addr = &am.addr;
5240 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5242 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5243 set_am_attributes(cnt, &am);
5244 set_ia32_ls_mode(cnt, get_irn_mode(param));
5246 SET_IA32_ORIG_NODE(cnt, node);
5247 return fix_mem_proj(cnt, &am);
5250 new_param = be_transform_node(param);
5252 /* do the standard popcount algo */
5253 /* TODO: This is stupid, we should transform this before the backend,
5254 * to get CSE, localopts, etc. for the operations
5255 * TODO: This is also not the optimal algorithm (it is just the starting
5256 * example in hackers delight, they optimize it more on the following page)
5257 * But I'm too lazy to fix this now, as the code should get lowered before
5258 * the backend anyway.
5261 /* m1 = x & 0x55555555 */
5262 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5263 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5266 simm = ia32_create_Immediate(NULL, 0, 1);
5267 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5269 /* m2 = s1 & 0x55555555 */
5270 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5273 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5275 /* m4 = m3 & 0x33333333 */
5276 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5277 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5280 simm = ia32_create_Immediate(NULL, 0, 2);
5281 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5283 /* m5 = s2 & 0x33333333 */
5284 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5287 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5289 /* m7 = m6 & 0x0F0F0F0F */
5290 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5291 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5294 simm = ia32_create_Immediate(NULL, 0, 4);
5295 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5297 /* m8 = s3 & 0x0F0F0F0F */
5298 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5301 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5303 /* m10 = m9 & 0x00FF00FF */
5304 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5305 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5308 simm = ia32_create_Immediate(NULL, 0, 8);
5309 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5311 /* m11 = s4 & 0x00FF00FF */
5312 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5314 /* m12 = m10 + m11 */
5315 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5317 /* m13 = m12 & 0x0000FFFF */
5318 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5319 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5321 /* s5 = m12 >> 16 */
5322 simm = ia32_create_Immediate(NULL, 0, 16);
5323 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5325 /* res = m13 + s5 */
5326 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5330 * Transform builtin byte swap.
5332 static ir_node *gen_bswap(ir_node *node)
5334 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5335 dbg_info *dbgi = get_irn_dbg_info(node);
5337 ir_node *block = get_nodes_block(node);
5338 ir_node *new_block = be_transform_node(block);
5339 ir_mode *mode = get_irn_mode(param);
5340 unsigned size = get_mode_size_bits(mode);
5341 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5345 if (ia32_cg_config.use_i486) {
5346 /* swap available */
5347 return new_bd_ia32_Bswap(dbgi, new_block, param);
5349 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5350 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5352 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5353 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5355 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5357 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5358 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5360 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5361 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5364 /* swap16 always available */
5365 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5368 panic("Invalid bswap size (%d)", size);
5373 * Transform builtin outport.
5375 static ir_node *gen_outport(ir_node *node)
5377 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5378 ir_node *oldv = get_Builtin_param(node, 1);
5379 ir_mode *mode = get_irn_mode(oldv);
5380 ir_node *value = be_transform_node(oldv);
5381 ir_node *block = be_transform_node(get_nodes_block(node));
5382 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5383 dbg_info *dbgi = get_irn_dbg_info(node);
5385 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5386 set_ia32_ls_mode(res, mode);
5391 * Transform builtin inport.
5393 static ir_node *gen_inport(ir_node *node)
5395 ir_type *tp = get_Builtin_type(node);
5396 ir_type *rstp = get_method_res_type(tp, 0);
5397 ir_mode *mode = get_type_mode(rstp);
5398 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5399 ir_node *block = be_transform_node(get_nodes_block(node));
5400 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5401 dbg_info *dbgi = get_irn_dbg_info(node);
5403 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5404 set_ia32_ls_mode(res, mode);
5406 /* check for missing Result Proj */
5411 * Transform a builtin inner trampoline
5413 static ir_node *gen_inner_trampoline(ir_node *node)
5415 ir_node *ptr = get_Builtin_param(node, 0);
5416 ir_node *callee = get_Builtin_param(node, 1);
5417 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5418 ir_node *mem = get_Builtin_mem(node);
5419 ir_node *block = get_nodes_block(node);
5420 ir_node *new_block = be_transform_node(block);
5424 ir_node *trampoline;
5426 dbg_info *dbgi = get_irn_dbg_info(node);
5427 ia32_address_t addr;
5429 /* construct store address */
5430 memset(&addr, 0, sizeof(addr));
5431 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5433 if (addr.base == NULL) {
5434 addr.base = noreg_GP;
5436 addr.base = be_transform_node(addr.base);
5439 if (addr.index == NULL) {
5440 addr.index = noreg_GP;
5442 addr.index = be_transform_node(addr.index);
5444 addr.mem = be_transform_node(mem);
5446 /* mov ecx, <env> */
5447 val = ia32_create_Immediate(NULL, 0, 0xB9);
5448 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5449 addr.index, addr.mem, val);
5450 set_irn_pinned(store, get_irn_pinned(node));
5451 set_ia32_op_type(store, ia32_AddrModeD);
5452 set_ia32_ls_mode(store, mode_Bu);
5453 set_address(store, &addr);
5457 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5458 addr.index, addr.mem, env);
5459 set_irn_pinned(store, get_irn_pinned(node));
5460 set_ia32_op_type(store, ia32_AddrModeD);
5461 set_ia32_ls_mode(store, mode_Iu);
5462 set_address(store, &addr);
5466 /* jmp rel <callee> */
5467 val = ia32_create_Immediate(NULL, 0, 0xE9);
5468 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5469 addr.index, addr.mem, val);
5470 set_irn_pinned(store, get_irn_pinned(node));
5471 set_ia32_op_type(store, ia32_AddrModeD);
5472 set_ia32_ls_mode(store, mode_Bu);
5473 set_address(store, &addr);
5477 trampoline = be_transform_node(ptr);
5479 /* the callee is typically an immediate */
5480 if (is_SymConst(callee)) {
5481 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5483 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5484 add_ia32_am_offs_int(rel, -10);
5486 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5488 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5489 addr.index, addr.mem, rel);
5490 set_irn_pinned(store, get_irn_pinned(node));
5491 set_ia32_op_type(store, ia32_AddrModeD);
5492 set_ia32_ls_mode(store, mode_Iu);
5493 set_address(store, &addr);
5498 return new_r_Tuple(new_block, 2, in);
5502 * Transform Builtin node.
5504 static ir_node *gen_Builtin(ir_node *node)
5506 ir_builtin_kind kind = get_Builtin_kind(node);
5510 return gen_trap(node);
5511 case ir_bk_debugbreak:
5512 return gen_debugbreak(node);
5513 case ir_bk_return_address:
5514 return gen_return_address(node);
5515 case ir_bk_frame_address:
5516 return gen_frame_address(node);
5517 case ir_bk_prefetch:
5518 return gen_prefetch(node);
5520 return gen_ffs(node);
5522 return gen_clz(node);
5524 return gen_ctz(node);
5526 return gen_parity(node);
5527 case ir_bk_popcount:
5528 return gen_popcount(node);
5530 return gen_bswap(node);
5532 return gen_outport(node);
5534 return gen_inport(node);
5535 case ir_bk_inner_trampoline:
5536 return gen_inner_trampoline(node);
5538 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5542 * Transform Proj(Builtin) node.
5544 static ir_node *gen_Proj_Builtin(ir_node *proj)
5546 ir_node *node = get_Proj_pred(proj);
5547 ir_node *new_node = be_transform_node(node);
5548 ir_builtin_kind kind = get_Builtin_kind(node);
5551 case ir_bk_return_address:
5552 case ir_bk_frame_address:
5557 case ir_bk_popcount:
5559 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5562 case ir_bk_debugbreak:
5563 case ir_bk_prefetch:
5565 assert(get_Proj_proj(proj) == pn_Builtin_M);
5568 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5569 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5571 assert(get_Proj_proj(proj) == pn_Builtin_M);
5572 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5574 case ir_bk_inner_trampoline:
5575 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5576 return get_Tuple_pred(new_node, 1);
5578 assert(get_Proj_proj(proj) == pn_Builtin_M);
5579 return get_Tuple_pred(new_node, 0);
5582 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5585 static ir_node *gen_be_IncSP(ir_node *node)
5587 ir_node *res = be_duplicate_node(node);
5588 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5594 * Transform the Projs from a be_Call.
5596 static ir_node *gen_Proj_be_Call(ir_node *node)
5598 ir_node *call = get_Proj_pred(node);
5599 ir_node *new_call = be_transform_node(call);
5600 dbg_info *dbgi = get_irn_dbg_info(node);
5601 long proj = get_Proj_proj(node);
5602 ir_mode *mode = get_irn_mode(node);
5605 if (proj == pn_be_Call_M) {
5606 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5608 /* transform call modes */
5609 if (mode_is_data(mode)) {
5610 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5614 /* Map from be_Call to ia32_Call proj number */
5615 if (proj == pn_be_Call_sp) {
5616 proj = pn_ia32_Call_stack;
5617 } else if (proj == pn_be_Call_M) {
5618 proj = pn_ia32_Call_M;
5619 } else if (proj == pn_be_Call_X_except) {
5620 proj = pn_ia32_Call_X_except;
5621 } else if (proj == pn_be_Call_X_regular) {
5622 proj = pn_ia32_Call_X_regular;
5624 arch_register_req_t const *const req = arch_get_register_req_out(node);
5625 int const n_outs = arch_irn_get_n_outs(new_call);
5628 assert(proj >= pn_be_Call_first_res);
5629 assert(req->type & arch_register_req_type_limited);
5631 for (i = 0; i < n_outs; ++i) {
5632 arch_register_req_t const *const new_req
5633 = arch_get_out_register_req(new_call, i);
5635 if (!(new_req->type & arch_register_req_type_limited) ||
5636 new_req->cls != req->cls ||
5637 *new_req->limited != *req->limited)
5646 res = new_rd_Proj(dbgi, new_call, mode, proj);
5648 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5650 case pn_ia32_Call_stack:
5651 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5654 case pn_ia32_Call_fpcw:
5655 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5663 * Transform the Projs from a Cmp.
5665 static ir_node *gen_Proj_Cmp(ir_node *node)
5667 /* this probably means not all mode_b nodes were lowered... */
5668 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5672 static ir_node *gen_Proj_ASM(ir_node *node)
5674 ir_mode *mode = get_irn_mode(node);
5675 ir_node *pred = get_Proj_pred(node);
5676 ir_node *new_pred = be_transform_node(pred);
5677 long pos = get_Proj_proj(node);
5679 if (mode == mode_M) {
5680 pos = arch_irn_get_n_outs(new_pred)-1;
5681 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5683 } else if (mode_is_float(mode)) {
5686 panic("unexpected proj mode at ASM");
5689 return new_r_Proj(new_pred, mode, pos);
5693 * Transform and potentially renumber Proj nodes.
5695 static ir_node *gen_Proj(ir_node *node)
5697 ir_node *pred = get_Proj_pred(node);
5700 switch (get_irn_opcode(pred)) {
5702 return gen_Proj_Load(node);
5704 return gen_Proj_Store(node);
5706 return gen_Proj_ASM(node);
5708 return gen_Proj_Builtin(node);
5710 return gen_Proj_Div(node);
5712 return gen_Proj_Mod(node);
5714 return gen_Proj_CopyB(node);
5716 return gen_Proj_be_SubSP(node);
5718 return gen_Proj_be_AddSP(node);
5720 return gen_Proj_be_Call(node);
5722 return gen_Proj_Cmp(node);
5724 proj = get_Proj_proj(node);
5726 case pn_Start_X_initial_exec: {
5727 ir_node *block = get_nodes_block(pred);
5728 ir_node *new_block = be_transform_node(block);
5729 dbg_info *dbgi = get_irn_dbg_info(node);
5730 /* we exchange the ProjX with a jump */
5731 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5739 if (is_ia32_l_FloattoLL(pred)) {
5740 return gen_Proj_l_FloattoLL(node);
5742 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5746 ir_mode *mode = get_irn_mode(node);
5747 if (ia32_mode_needs_gp_reg(mode)) {
5748 ir_node *new_pred = be_transform_node(pred);
5749 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5750 get_Proj_proj(node));
5751 new_proj->node_nr = node->node_nr;
5756 return be_duplicate_node(node);
5760 * Enters all transform functions into the generic pointer
5762 static void register_transformers(void)
5764 /* first clear the generic function pointer for all ops */
5765 be_start_transform_setup();
5767 be_set_transform_function(op_Add, gen_Add);
5768 be_set_transform_function(op_And, gen_And);
5769 be_set_transform_function(op_ASM, ia32_gen_ASM);
5770 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5771 be_set_transform_function(op_be_Call, gen_be_Call);
5772 be_set_transform_function(op_be_Copy, gen_be_Copy);
5773 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5774 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5775 be_set_transform_function(op_be_Return, gen_be_Return);
5776 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5777 be_set_transform_function(op_Builtin, gen_Builtin);
5778 be_set_transform_function(op_Cmp, gen_Cmp);
5779 be_set_transform_function(op_Cond, gen_Cond);
5780 be_set_transform_function(op_Const, gen_Const);
5781 be_set_transform_function(op_Conv, gen_Conv);
5782 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5783 be_set_transform_function(op_Div, gen_Div);
5784 be_set_transform_function(op_Eor, gen_Eor);
5785 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5786 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5787 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5788 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5789 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5790 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5791 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5792 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5793 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5794 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5795 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5796 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5797 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5798 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5799 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5800 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5801 be_set_transform_function(op_IJmp, gen_IJmp);
5802 be_set_transform_function(op_Jmp, gen_Jmp);
5803 be_set_transform_function(op_Load, gen_Load);
5804 be_set_transform_function(op_Minus, gen_Minus);
5805 be_set_transform_function(op_Mod, gen_Mod);
5806 be_set_transform_function(op_Mul, gen_Mul);
5807 be_set_transform_function(op_Mulh, gen_Mulh);
5808 be_set_transform_function(op_Mux, gen_Mux);
5809 be_set_transform_function(op_Not, gen_Not);
5810 be_set_transform_function(op_Or, gen_Or);
5811 be_set_transform_function(op_Phi, gen_Phi);
5812 be_set_transform_function(op_Proj, gen_Proj);
5813 be_set_transform_function(op_Rotl, gen_Rotl);
5814 be_set_transform_function(op_Shl, gen_Shl);
5815 be_set_transform_function(op_Shr, gen_Shr);
5816 be_set_transform_function(op_Shrs, gen_Shrs);
5817 be_set_transform_function(op_Store, gen_Store);
5818 be_set_transform_function(op_Sub, gen_Sub);
5819 be_set_transform_function(op_SymConst, gen_SymConst);
5820 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5824 * Pre-transform all unknown and noreg nodes.
5826 static void ia32_pretransform_node(void)
5828 ir_graph *irg = current_ir_graph;
5829 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5831 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5832 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5833 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5834 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5835 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5837 nomem = get_irg_no_mem(irg);
5838 noreg_GP = ia32_new_NoReg_gp(irg);
5842 * Post-process all calls if we are in SSE mode.
5843 * The ABI requires that the results are in st0, copy them
5844 * to a xmm register.
5846 static void postprocess_fp_call_results(void)
5850 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5851 ir_node *call = call_list[i];
5852 ir_type *mtp = call_types[i];
5855 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5856 ir_type *res_tp = get_method_res_type(mtp, j);
5857 ir_node *res, *new_res;
5858 const ir_edge_t *edge, *next;
5861 if (! is_atomic_type(res_tp)) {
5862 /* no floating point return */
5865 mode = get_type_mode(res_tp);
5866 if (! mode_is_float(mode)) {
5867 /* no floating point return */
5871 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5874 /* now patch the users */
5875 foreach_out_edge_safe(res, edge, next) {
5876 ir_node *succ = get_edge_src_irn(edge);
5879 if (be_is_Keep(succ))
5882 if (is_ia32_xStore(succ)) {
5883 /* an xStore can be patched into an vfst */
5884 dbg_info *db = get_irn_dbg_info(succ);
5885 ir_node *block = get_nodes_block(succ);
5886 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5887 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5888 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5889 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5890 ir_mode *mode = get_ia32_ls_mode(succ);
5892 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5893 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5894 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5895 if (is_ia32_use_frame(succ))
5896 set_ia32_use_frame(st);
5897 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5898 set_irn_pinned(st, get_irn_pinned(succ));
5899 set_ia32_op_type(st, ia32_AddrModeD);
5901 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5902 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5903 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5906 } else if (new_res == NULL) {
5907 dbg_info *db = get_irn_dbg_info(call);
5908 ir_node *block = get_nodes_block(call);
5909 ir_node *frame = get_irg_frame(current_ir_graph);
5910 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5911 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5912 ir_node *vfst, *xld, *new_mem;
5915 /* store st(0) on stack */
5916 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5918 set_ia32_op_type(vfst, ia32_AddrModeD);
5919 set_ia32_use_frame(vfst);
5921 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5923 /* load into SSE register */
5924 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5926 set_ia32_op_type(xld, ia32_AddrModeS);
5927 set_ia32_use_frame(xld);
5929 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5930 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5932 if (old_mem != NULL) {
5933 edges_reroute(old_mem, new_mem);
5937 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5943 /* do the transformation */
5944 void ia32_transform_graph(ir_graph *irg)
5948 register_transformers();
5949 initial_fpcw = NULL;
5950 ia32_no_pic_adjust = 0;
5952 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5954 be_timer_push(T_HEIGHTS);
5955 ia32_heights = heights_new(irg);
5956 be_timer_pop(T_HEIGHTS);
5957 ia32_calculate_non_address_mode_nodes(irg);
5959 /* the transform phase is not safe for CSE (yet) because several nodes get
5960 * attributes set after their creation */
5961 cse_last = get_opt_cse();
5964 call_list = NEW_ARR_F(ir_node *, 0);
5965 call_types = NEW_ARR_F(ir_type *, 0);
5966 be_transform_graph(irg, ia32_pretransform_node);
5968 if (ia32_cg_config.use_sse2)
5969 postprocess_fp_call_results();
5970 DEL_ARR_F(call_types);
5971 DEL_ARR_F(call_list);
5973 set_opt_cse(cse_last);
5975 ia32_free_non_address_mode_nodes();
5976 heights_free(ia32_heights);
5977 ia32_heights = NULL;
5980 void ia32_init_transform(void)
5982 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");