2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
231 if (ia32_cg_config.use_sse2) {
232 ir_tarval *tv = get_Const_tarval(node);
233 if (tarval_is_null(tv)) {
234 load = new_bd_ia32_xZero(dbgi, block);
235 set_ia32_ls_mode(load, mode);
237 #ifdef CONSTRUCT_SSE_CONST
238 } else if (tarval_is_one(tv)) {
239 int cnst = mode == mode_F ? 26 : 55;
240 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
241 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
242 ir_node *pslld, *psrld;
244 load = new_bd_ia32_xAllOnes(dbgi, block);
245 set_ia32_ls_mode(load, mode);
246 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
247 set_ia32_ls_mode(pslld, mode);
248 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
249 set_ia32_ls_mode(psrld, mode);
251 #endif /* CONSTRUCT_SSE_CONST */
252 } else if (mode == mode_F) {
253 /* we can place any 32bit constant by using a movd gp, sse */
254 unsigned val = get_tarval_sub_bits(tv, 0) |
255 (get_tarval_sub_bits(tv, 1) << 8) |
256 (get_tarval_sub_bits(tv, 2) << 16) |
257 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
259 load = new_bd_ia32_xMovd(dbgi, block, cnst);
260 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
885 am->op_type = ia32_Normal;
887 if (flags & match_try_am) {
893 mode = get_irn_mode(op2);
894 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
895 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
897 new_op2 = create_upconv(op2, NULL);
898 am->ls_mode = mode_Iu;
900 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
902 new_op2 = be_transform_node(op2);
903 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
906 if (addr->base == NULL)
907 addr->base = noreg_GP;
908 if (addr->index == NULL)
909 addr->index = noreg_GP;
910 if (addr->mem == NULL)
913 am->new_op1 = new_op1;
914 am->new_op2 = new_op2;
915 am->commutative = commutative;
919 * "Fixes" a node that uses address mode by turning it into mode_T
920 * and returning a pn_ia32_res Proj.
922 * @param node the node
923 * @param am its address mode
925 * @return a Proj(pn_ia32_res) if a memory address mode is used,
928 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
933 if (am->mem_proj == NULL)
936 /* we have to create a mode_T so the old MemProj can attach to us */
937 mode = get_irn_mode(node);
938 load = get_Proj_pred(am->mem_proj);
940 be_set_transformed_node(load, node);
942 if (mode != mode_T) {
943 set_irn_mode(node, mode_T);
944 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
951 * Construct a standard binary operation, set AM and immediate if required.
953 * @param node The original node for which the binop is created
954 * @param op1 The first operand
955 * @param op2 The second operand
956 * @param func The node constructor function
957 * @return The constructed ia32 node.
959 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
960 construct_binop_func *func, match_flags_t flags)
963 ir_node *block, *new_block, *new_node;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 block = get_nodes_block(node);
968 match_arguments(&am, block, op1, op2, NULL, flags);
970 dbgi = get_irn_dbg_info(node);
971 new_block = be_transform_node(block);
972 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
987 * Generic names for the inputs of an ia32 binary op.
990 n_ia32_l_binop_left, /**< ia32 left input */
991 n_ia32_l_binop_right, /**< ia32 right input */
992 n_ia32_l_binop_eflags /**< ia32 eflags input */
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
999 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1002 * Construct a binary operation which also consumes the eflags.
1004 * @param node The node to transform
1005 * @param func The node constructor function
1006 * @param flags The match flags
1007 * @return The constructor ia32 node
1009 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1010 match_flags_t flags)
1012 ir_node *src_block = get_nodes_block(node);
1013 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1014 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1015 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1017 ir_node *block, *new_node, *new_eflags;
1018 ia32_address_mode_t am;
1019 ia32_address_t *addr = &am.addr;
1021 match_arguments(&am, src_block, op1, op2, eflags, flags);
1023 dbgi = get_irn_dbg_info(node);
1024 block = be_transform_node(src_block);
1025 new_eflags = be_transform_node(eflags);
1026 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1027 am.new_op1, am.new_op2, new_eflags);
1028 set_am_attributes(new_node, &am);
1029 /* we can't use source address mode anymore when using immediates */
1030 if (!(flags & match_am_and_immediates) &&
1031 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1032 set_ia32_am_support(new_node, ia32_am_none);
1033 SET_IA32_ORIG_NODE(new_node, node);
1035 new_node = fix_mem_proj(new_node, &am);
1040 static ir_node *get_fpcw(void)
1042 if (initial_fpcw != NULL)
1043 return initial_fpcw;
1045 initial_fpcw = be_transform_node(old_initial_fpcw);
1046 return initial_fpcw;
1050 * Construct a standard binary operation, set AM and immediate if required.
1052 * @param op1 The first operand
1053 * @param op2 The second operand
1054 * @param func The node constructor function
1055 * @return The constructed ia32 node.
1057 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1058 construct_binop_float_func *func)
1060 ir_mode *mode = get_irn_mode(node);
1062 ir_node *block, *new_block, *new_node;
1063 ia32_address_mode_t am;
1064 ia32_address_t *addr = &am.addr;
1065 ia32_x87_attr_t *attr;
1066 /* All operations are considered commutative, because there are reverse
1068 match_flags_t flags = match_commutative;
1070 /* happens for div nodes... */
1071 if (mode == mode_T) {
1073 mode = get_Div_resmode(node);
1075 panic("can't determine mode");
1078 /* cannot use address mode with long double on x87 */
1079 if (get_mode_size_bits(mode) <= 64)
1082 block = get_nodes_block(node);
1083 match_arguments(&am, block, op1, op2, NULL, flags);
1085 dbgi = get_irn_dbg_info(node);
1086 new_block = be_transform_node(block);
1087 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1088 am.new_op1, am.new_op2, get_fpcw());
1089 set_am_attributes(new_node, &am);
1091 attr = get_ia32_x87_attr(new_node);
1092 attr->attr.data.ins_permuted = am.ins_permuted;
1094 SET_IA32_ORIG_NODE(new_node, node);
1096 new_node = fix_mem_proj(new_node, &am);
1102 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1104 * @param op1 The first operand
1105 * @param op2 The second operand
1106 * @param func The node constructor function
1107 * @return The constructed ia32 node.
1109 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1110 construct_shift_func *func,
1111 match_flags_t flags)
1114 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1116 assert(! mode_is_float(get_irn_mode(node)));
1117 assert(flags & match_immediate);
1118 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1120 if (flags & match_mode_neutral) {
1121 op1 = ia32_skip_downconv(op1);
1122 new_op1 = be_transform_node(op1);
1123 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1124 new_op1 = create_upconv(op1, node);
1126 new_op1 = be_transform_node(op1);
1129 /* the shift amount can be any mode that is bigger than 5 bits, since all
1130 * other bits are ignored anyway */
1131 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1132 ir_node *const op = get_Conv_op(op2);
1133 if (mode_is_float(get_irn_mode(op)))
1136 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1138 new_op2 = create_immediate_or_transform(op2, 0);
1140 dbgi = get_irn_dbg_info(node);
1141 block = get_nodes_block(node);
1142 new_block = be_transform_node(block);
1143 new_node = func(dbgi, new_block, new_op1, new_op2);
1144 SET_IA32_ORIG_NODE(new_node, node);
1146 /* lowered shift instruction may have a dependency operand, handle it here */
1147 if (get_irn_arity(node) == 3) {
1148 /* we have a dependency */
1149 ir_node* dep = get_irn_n(node, 2);
1150 if (get_irn_n_edges(dep) > 1) {
1151 /* ... which has at least one user other than 'node' */
1152 ir_node *new_dep = be_transform_node(dep);
1153 add_irn_dep(new_node, new_dep);
1162 * Construct a standard unary operation, set AM and immediate if required.
1164 * @param op The operand
1165 * @param func The node constructor function
1166 * @return The constructed ia32 node.
1168 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1169 match_flags_t flags)
1172 ir_node *block, *new_block, *new_op, *new_node;
1174 assert(flags == 0 || flags == match_mode_neutral);
1175 if (flags & match_mode_neutral) {
1176 op = ia32_skip_downconv(op);
1179 new_op = be_transform_node(op);
1180 dbgi = get_irn_dbg_info(node);
1181 block = get_nodes_block(node);
1182 new_block = be_transform_node(block);
1183 new_node = func(dbgi, new_block, new_op);
1185 SET_IA32_ORIG_NODE(new_node, node);
1190 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1191 ia32_address_t *addr)
1193 ir_node *base, *index, *res;
1199 base = be_transform_node(base);
1202 index = addr->index;
1203 if (index == NULL) {
1206 index = be_transform_node(index);
1209 /* segment overrides are ineffective for Leas :-( so we have to patch
1211 if (addr->tls_segment) {
1212 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1213 assert(addr->symconst_ent != NULL);
1214 if (base == noreg_GP)
1217 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1218 addr->tls_segment = false;
1221 res = new_bd_ia32_Lea(dbgi, block, base, index);
1222 set_address(res, addr);
1228 * Returns non-zero if a given address mode has a symbolic or
1229 * numerical offset != 0.
1231 static int am_has_immediates(const ia32_address_t *addr)
1233 return addr->offset != 0 || addr->symconst_ent != NULL
1234 || addr->frame_entity || addr->use_frame;
1238 * Creates an ia32 Add.
1240 * @return the created ia32 Add node
1242 static ir_node *gen_Add(ir_node *node)
1244 ir_mode *mode = get_irn_mode(node);
1245 ir_node *op1 = get_Add_left(node);
1246 ir_node *op2 = get_Add_right(node);
1248 ir_node *block, *new_block, *new_node, *add_immediate_op;
1249 ia32_address_t addr;
1250 ia32_address_mode_t am;
1252 if (mode_is_float(mode)) {
1253 if (ia32_cg_config.use_sse2)
1254 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1255 match_commutative | match_am);
1257 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1260 ia32_mark_non_am(node);
1262 op2 = ia32_skip_downconv(op2);
1263 op1 = ia32_skip_downconv(op1);
1267 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1268 * 1. Add with immediate -> Lea
1269 * 2. Add with possible source address mode -> Add
1270 * 3. Otherwise -> Lea
1272 memset(&addr, 0, sizeof(addr));
1273 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1274 add_immediate_op = NULL;
1276 dbgi = get_irn_dbg_info(node);
1277 block = get_nodes_block(node);
1278 new_block = be_transform_node(block);
1281 if (addr.base == NULL && addr.index == NULL) {
1282 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1283 addr.symconst_sign, 0, addr.offset);
1284 SET_IA32_ORIG_NODE(new_node, node);
1287 /* add with immediate? */
1288 if (addr.index == NULL) {
1289 add_immediate_op = addr.base;
1290 } else if (addr.base == NULL && addr.scale == 0) {
1291 add_immediate_op = addr.index;
1294 if (add_immediate_op != NULL) {
1295 if (!am_has_immediates(&addr)) {
1296 #ifdef DEBUG_libfirm
1297 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1300 return be_transform_node(add_immediate_op);
1303 new_node = create_lea_from_address(dbgi, new_block, &addr);
1304 SET_IA32_ORIG_NODE(new_node, node);
1308 /* test if we can use source address mode */
1309 match_arguments(&am, block, op1, op2, NULL, match_commutative
1310 | match_mode_neutral | match_am | match_immediate | match_try_am);
1312 /* construct an Add with source address mode */
1313 if (am.op_type == ia32_AddrModeS) {
1314 ia32_address_t *am_addr = &am.addr;
1315 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1316 am_addr->index, am_addr->mem, am.new_op1,
1318 set_am_attributes(new_node, &am);
1319 SET_IA32_ORIG_NODE(new_node, node);
1321 new_node = fix_mem_proj(new_node, &am);
1326 /* otherwise construct a lea */
1327 new_node = create_lea_from_address(dbgi, new_block, &addr);
1328 SET_IA32_ORIG_NODE(new_node, node);
1333 * Creates an ia32 Mul.
1335 * @return the created ia32 Mul node
1337 static ir_node *gen_Mul(ir_node *node)
1339 ir_node *op1 = get_Mul_left(node);
1340 ir_node *op2 = get_Mul_right(node);
1341 ir_mode *mode = get_irn_mode(node);
1343 if (mode_is_float(mode)) {
1344 if (ia32_cg_config.use_sse2)
1345 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1346 match_commutative | match_am);
1348 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1350 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1351 match_commutative | match_am | match_mode_neutral |
1352 match_immediate | match_am_and_immediates);
1356 * Creates an ia32 Mulh.
1357 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1358 * this result while Mul returns the lower 32 bit.
1360 * @return the created ia32 Mulh node
1362 static ir_node *gen_Mulh(ir_node *node)
1364 dbg_info *dbgi = get_irn_dbg_info(node);
1365 ir_node *op1 = get_Mulh_left(node);
1366 ir_node *op2 = get_Mulh_right(node);
1367 ir_mode *mode = get_irn_mode(node);
1369 ir_node *proj_res_high;
1371 if (get_mode_size_bits(mode) != 32) {
1372 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1375 if (mode_is_signed(mode)) {
1376 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1377 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1379 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1380 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1382 return proj_res_high;
1386 * Creates an ia32 And.
1388 * @return The created ia32 And node
1390 static ir_node *gen_And(ir_node *node)
1392 ir_node *op1 = get_And_left(node);
1393 ir_node *op2 = get_And_right(node);
1394 assert(! mode_is_float(get_irn_mode(node)));
1396 /* is it a zero extension? */
1397 if (is_Const(op2)) {
1398 ir_tarval *tv = get_Const_tarval(op2);
1399 long v = get_tarval_long(tv);
1401 if (v == 0xFF || v == 0xFFFF) {
1402 dbg_info *dbgi = get_irn_dbg_info(node);
1403 ir_node *block = get_nodes_block(node);
1410 assert(v == 0xFFFF);
1413 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1418 return gen_binop(node, op1, op2, new_bd_ia32_And,
1419 match_commutative | match_mode_neutral | match_am | match_immediate);
1423 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1426 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1428 if (is_Const(value1) && is_Const(value2)) {
1429 ir_tarval *tv1 = get_Const_tarval(value1);
1430 ir_tarval *tv2 = get_Const_tarval(value2);
1431 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1432 long v1 = get_tarval_long(tv1);
1433 long v2 = get_tarval_long(tv2);
1434 return v1 <= v2 && v2 == 32-v1;
1440 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1441 ir_node *high, ir_node *low,
1445 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1446 * op1 - target to be shifted
1447 * op2 - contains bits to be shifted into target
1449 * Only op3 can be an immediate.
1451 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1452 ir_node *high, ir_node *low, ir_node *count,
1453 new_shiftd_func func)
1455 ir_node *new_block = be_transform_node(block);
1456 ir_node *new_high = be_transform_node(high);
1457 ir_node *new_low = be_transform_node(low);
1461 /* the shift amount can be any mode that is bigger than 5 bits, since all
1462 * other bits are ignored anyway */
1463 while (is_Conv(count) &&
1464 get_irn_n_edges(count) == 1 &&
1465 mode_is_int(get_irn_mode(count))) {
1466 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1467 count = get_Conv_op(count);
1469 new_count = create_immediate_or_transform(count, 0);
1471 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1475 static ir_node *match_64bit_shift(ir_node *node)
1477 ir_node *op1 = get_Or_left(node);
1478 ir_node *op2 = get_Or_right(node);
1486 /* match ShlD operation */
1487 if (is_Shl(op1) && is_Shr(op2)) {
1488 ir_node *shl_right = get_Shl_right(op1);
1489 ir_node *shl_left = get_Shl_left(op1);
1490 ir_node *shr_right = get_Shr_right(op2);
1491 ir_node *shr_left = get_Shr_left(op2);
1492 /* constant ShlD operation */
1493 if (is_complementary_shifts(shl_right, shr_right)) {
1494 dbg_info *dbgi = get_irn_dbg_info(node);
1495 ir_node *block = get_nodes_block(node);
1496 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1499 /* constant ShrD operation */
1500 if (is_complementary_shifts(shr_right, shl_right)) {
1501 dbg_info *dbgi = get_irn_dbg_info(node);
1502 ir_node *block = get_nodes_block(node);
1503 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1506 /* lower_dw produces the following for ShlD:
1507 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1508 if (is_Shr(shr_left) && is_Not(shr_right)
1509 && is_Const_1(get_Shr_right(shr_left))
1510 && get_Not_op(shr_right) == shl_right) {
1511 dbg_info *dbgi = get_irn_dbg_info(node);
1512 ir_node *block = get_nodes_block(node);
1513 ir_node *val_h = get_Shr_left(shr_left);
1514 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1517 /* lower_dw produces the following for ShrD:
1518 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1519 if (is_Shl(shl_left) && is_Not(shl_right)
1520 && is_Const_1(get_Shl_right(shl_left))
1521 && get_Not_op(shl_right) == shr_right) {
1522 dbg_info *dbgi = get_irn_dbg_info(node);
1523 ir_node *block = get_nodes_block(node);
1524 ir_node *val_h = get_Shl_left(shl_left);
1525 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1534 * Creates an ia32 Or.
1536 * @return The created ia32 Or node
1538 static ir_node *gen_Or(ir_node *node)
1540 ir_node *op1 = get_Or_left(node);
1541 ir_node *op2 = get_Or_right(node);
1544 res = match_64bit_shift(node);
1548 assert (! mode_is_float(get_irn_mode(node)));
1549 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1550 | match_mode_neutral | match_am | match_immediate);
1556 * Creates an ia32 Eor.
1558 * @return The created ia32 Eor node
1560 static ir_node *gen_Eor(ir_node *node)
1562 ir_node *op1 = get_Eor_left(node);
1563 ir_node *op2 = get_Eor_right(node);
1565 assert(! mode_is_float(get_irn_mode(node)));
1566 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1567 | match_mode_neutral | match_am | match_immediate);
1572 * Creates an ia32 Sub.
1574 * @return The created ia32 Sub node
1576 static ir_node *gen_Sub(ir_node *node)
1578 ir_node *op1 = get_Sub_left(node);
1579 ir_node *op2 = get_Sub_right(node);
1580 ir_mode *mode = get_irn_mode(node);
1582 if (mode_is_float(mode)) {
1583 if (ia32_cg_config.use_sse2)
1584 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1586 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1589 if (is_Const(op2)) {
1590 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1594 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1595 | match_am | match_immediate);
1598 static ir_node *transform_AM_mem(ir_node *const block,
1599 ir_node *const src_val,
1600 ir_node *const src_mem,
1601 ir_node *const am_mem)
1603 if (is_NoMem(am_mem)) {
1604 return be_transform_node(src_mem);
1605 } else if (is_Proj(src_val) &&
1607 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1608 /* avoid memory loop */
1610 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1611 ir_node *const ptr_pred = get_Proj_pred(src_val);
1612 int const arity = get_Sync_n_preds(src_mem);
1617 NEW_ARR_A(ir_node*, ins, arity + 1);
1619 /* NOTE: This sometimes produces dead-code because the old sync in
1620 * src_mem might not be used anymore, we should detect this case
1621 * and kill the sync... */
1622 for (i = arity - 1; i >= 0; --i) {
1623 ir_node *const pred = get_Sync_pred(src_mem, i);
1625 /* avoid memory loop */
1626 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1629 ins[n++] = be_transform_node(pred);
1632 if (n==1 && ins[0] == am_mem) {
1634 /* creating a new Sync and relying on CSE may fail,
1635 * if am_mem is a ProjM, which does not yet verify. */
1639 return new_r_Sync(block, n, ins);
1643 ins[0] = be_transform_node(src_mem);
1645 return new_r_Sync(block, 2, ins);
1650 * Create a 32bit to 64bit signed extension.
1652 * @param dbgi debug info
1653 * @param block the block where node nodes should be placed
1654 * @param val the value to extend
1655 * @param orig the original node
1657 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1658 ir_node *val, const ir_node *orig)
1663 if (ia32_cg_config.use_short_sex_eax) {
1664 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1665 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1667 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1668 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1670 SET_IA32_ORIG_NODE(res, orig);
1675 * Generates an ia32 Div with additional infrastructure for the
1676 * register allocator if needed.
1678 static ir_node *create_Div(ir_node *node)
1680 dbg_info *dbgi = get_irn_dbg_info(node);
1681 ir_node *block = get_nodes_block(node);
1682 ir_node *new_block = be_transform_node(block);
1683 int throws_exception = ir_throws_exception(node);
1690 ir_node *sign_extension;
1691 ia32_address_mode_t am;
1692 ia32_address_t *addr = &am.addr;
1694 /* the upper bits have random contents for smaller modes */
1695 switch (get_irn_opcode(node)) {
1697 op1 = get_Div_left(node);
1698 op2 = get_Div_right(node);
1699 mem = get_Div_mem(node);
1700 mode = get_Div_resmode(node);
1703 op1 = get_Mod_left(node);
1704 op2 = get_Mod_right(node);
1705 mem = get_Mod_mem(node);
1706 mode = get_Mod_resmode(node);
1709 panic("invalid divmod node %+F", node);
1712 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1714 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1715 is the memory of the consumed address. We can have only the second op as address
1716 in Div nodes, so check only op2. */
1717 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1719 if (mode_is_signed(mode)) {
1720 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1721 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1722 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1724 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1726 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1727 addr->index, new_mem, am.new_op2,
1728 am.new_op1, sign_extension);
1730 ir_set_throws_exception(new_node, throws_exception);
1732 set_irn_pinned(new_node, get_irn_pinned(node));
1734 set_am_attributes(new_node, &am);
1735 SET_IA32_ORIG_NODE(new_node, node);
1737 new_node = fix_mem_proj(new_node, &am);
1743 * Generates an ia32 Mod.
1745 static ir_node *gen_Mod(ir_node *node)
1747 return create_Div(node);
1751 * Generates an ia32 Div.
1753 static ir_node *gen_Div(ir_node *node)
1755 ir_mode *mode = get_Div_resmode(node);
1756 if (mode_is_float(mode)) {
1757 ir_node *op1 = get_Div_left(node);
1758 ir_node *op2 = get_Div_right(node);
1760 if (ia32_cg_config.use_sse2) {
1761 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1763 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1767 return create_Div(node);
1771 * Creates an ia32 Shl.
1773 * @return The created ia32 Shl node
1775 static ir_node *gen_Shl(ir_node *node)
1777 ir_node *left = get_Shl_left(node);
1778 ir_node *right = get_Shl_right(node);
1780 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1781 match_mode_neutral | match_immediate);
1785 * Creates an ia32 Shr.
1787 * @return The created ia32 Shr node
1789 static ir_node *gen_Shr(ir_node *node)
1791 ir_node *left = get_Shr_left(node);
1792 ir_node *right = get_Shr_right(node);
1794 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1800 * Creates an ia32 Sar.
1802 * @return The created ia32 Shrs node
1804 static ir_node *gen_Shrs(ir_node *node)
1806 ir_node *left = get_Shrs_left(node);
1807 ir_node *right = get_Shrs_right(node);
1809 if (is_Const(right)) {
1810 ir_tarval *tv = get_Const_tarval(right);
1811 long val = get_tarval_long(tv);
1813 /* this is a sign extension */
1814 dbg_info *dbgi = get_irn_dbg_info(node);
1815 ir_node *block = be_transform_node(get_nodes_block(node));
1816 ir_node *new_op = be_transform_node(left);
1818 return create_sex_32_64(dbgi, block, new_op, node);
1822 /* 8 or 16 bit sign extension? */
1823 if (is_Const(right) && is_Shl(left)) {
1824 ir_node *shl_left = get_Shl_left(left);
1825 ir_node *shl_right = get_Shl_right(left);
1826 if (is_Const(shl_right)) {
1827 ir_tarval *tv1 = get_Const_tarval(right);
1828 ir_tarval *tv2 = get_Const_tarval(shl_right);
1829 if (tv1 == tv2 && tarval_is_long(tv1)) {
1830 long val = get_tarval_long(tv1);
1831 if (val == 16 || val == 24) {
1832 dbg_info *dbgi = get_irn_dbg_info(node);
1833 ir_node *block = get_nodes_block(node);
1843 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1852 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1858 * Creates an ia32 Rol.
1860 * @param op1 The first operator
1861 * @param op2 The second operator
1862 * @return The created ia32 RotL node
1864 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1866 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1872 * Creates an ia32 Ror.
1873 * NOTE: There is no RotR with immediate because this would always be a RotL
1874 * "imm-mode_size_bits" which can be pre-calculated.
1876 * @param op1 The first operator
1877 * @param op2 The second operator
1878 * @return The created ia32 RotR node
1880 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1882 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1888 * Creates an ia32 RotR or RotL (depending on the found pattern).
1890 * @return The created ia32 RotL or RotR node
1892 static ir_node *gen_Rotl(ir_node *node)
1894 ir_node *op1 = get_Rotl_left(node);
1895 ir_node *op2 = get_Rotl_right(node);
1897 if (is_Minus(op2)) {
1898 return gen_Ror(node, op1, get_Minus_op(op2));
1901 return gen_Rol(node, op1, op2);
1907 * Transforms a Minus node.
1909 * @return The created ia32 Minus node
1911 static ir_node *gen_Minus(ir_node *node)
1913 ir_node *op = get_Minus_op(node);
1914 ir_node *block = be_transform_node(get_nodes_block(node));
1915 dbg_info *dbgi = get_irn_dbg_info(node);
1916 ir_mode *mode = get_irn_mode(node);
1921 if (mode_is_float(mode)) {
1922 ir_node *new_op = be_transform_node(op);
1923 if (ia32_cg_config.use_sse2) {
1924 /* TODO: non-optimal... if we have many xXors, then we should
1925 * rather create a load for the const and use that instead of
1926 * several AM nodes... */
1927 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1929 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1930 noreg_GP, nomem, new_op, noreg_xmm);
1932 size = get_mode_size_bits(mode);
1933 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1935 set_ia32_am_sc(new_node, ent);
1936 set_ia32_op_type(new_node, ia32_AddrModeS);
1937 set_ia32_ls_mode(new_node, mode);
1939 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1942 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1945 SET_IA32_ORIG_NODE(new_node, node);
1951 * Transforms a Not node.
1953 * @return The created ia32 Not node
1955 static ir_node *gen_Not(ir_node *node)
1957 ir_node *op = get_Not_op(node);
1959 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1960 assert (! mode_is_float(get_irn_mode(node)));
1962 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1965 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1966 bool negate, ir_node *node)
1968 ir_node *new_block = be_transform_node(block);
1969 ir_mode *mode = get_irn_mode(op);
1975 if (mode_is_float(mode)) {
1976 new_op = be_transform_node(op);
1978 if (ia32_cg_config.use_sse2) {
1979 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1980 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1981 noreg_GP, nomem, new_op, noreg_fp);
1983 size = get_mode_size_bits(mode);
1984 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1986 set_ia32_am_sc(new_node, ent);
1988 SET_IA32_ORIG_NODE(new_node, node);
1990 set_ia32_op_type(new_node, ia32_AddrModeS);
1991 set_ia32_ls_mode(new_node, mode);
1993 /* TODO, implement -Abs case */
1996 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1997 SET_IA32_ORIG_NODE(new_node, node);
1999 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2000 SET_IA32_ORIG_NODE(new_node, node);
2009 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2011 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2013 dbg_info *dbgi = get_irn_dbg_info(cmp);
2014 ir_node *block = get_nodes_block(cmp);
2015 ir_node *new_block = be_transform_node(block);
2016 ir_node *op1 = be_transform_node(x);
2017 ir_node *op2 = be_transform_node(n);
2019 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2022 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2025 if (mode_is_float(mode)) {
2027 case ir_relation_equal: return ia32_cc_float_equal;
2028 case ir_relation_less: return ia32_cc_float_below;
2029 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2030 case ir_relation_greater: return ia32_cc_float_above;
2031 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2032 case ir_relation_less_greater: return ia32_cc_not_equal;
2033 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2034 case ir_relation_unordered: return ia32_cc_parity;
2035 case ir_relation_unordered_equal: return ia32_cc_equal;
2036 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2037 case ir_relation_unordered_less_equal:
2038 return ia32_cc_float_unordered_below_equal;
2039 case ir_relation_unordered_greater:
2040 return ia32_cc_float_unordered_above;
2041 case ir_relation_unordered_greater_equal:
2042 return ia32_cc_float_unordered_above_equal;
2043 case ir_relation_unordered_less_greater:
2044 return ia32_cc_float_not_equal;
2045 case ir_relation_false:
2046 case ir_relation_true:
2047 /* should we introduce a jump always/jump never? */
2050 panic("Unexpected float pnc");
2051 } else if (mode_is_signed(mode)) {
2053 case ir_relation_unordered_equal:
2054 case ir_relation_equal: return ia32_cc_equal;
2055 case ir_relation_unordered_less:
2056 case ir_relation_less: return ia32_cc_less;
2057 case ir_relation_unordered_less_equal:
2058 case ir_relation_less_equal: return ia32_cc_less_equal;
2059 case ir_relation_unordered_greater:
2060 case ir_relation_greater: return ia32_cc_greater;
2061 case ir_relation_unordered_greater_equal:
2062 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2063 case ir_relation_unordered_less_greater:
2064 case ir_relation_less_greater: return ia32_cc_not_equal;
2065 case ir_relation_less_equal_greater:
2066 case ir_relation_unordered:
2067 case ir_relation_false:
2068 case ir_relation_true:
2069 /* introduce jump always/jump never? */
2072 panic("Unexpected pnc");
2075 case ir_relation_unordered_equal:
2076 case ir_relation_equal: return ia32_cc_equal;
2077 case ir_relation_unordered_less:
2078 case ir_relation_less: return ia32_cc_below;
2079 case ir_relation_unordered_less_equal:
2080 case ir_relation_less_equal: return ia32_cc_below_equal;
2081 case ir_relation_unordered_greater:
2082 case ir_relation_greater: return ia32_cc_above;
2083 case ir_relation_unordered_greater_equal:
2084 case ir_relation_greater_equal: return ia32_cc_above_equal;
2085 case ir_relation_unordered_less_greater:
2086 case ir_relation_less_greater: return ia32_cc_not_equal;
2087 case ir_relation_less_equal_greater:
2088 case ir_relation_unordered:
2089 case ir_relation_false:
2090 case ir_relation_true:
2091 /* introduce jump always/jump never? */
2094 panic("Unexpected pnc");
2098 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
2100 /* a mode_b value, we have to compare it against 0 */
2101 dbg_info *dbgi = get_irn_dbg_info(node);
2102 ir_node *new_block = be_transform_node(get_nodes_block(node));
2103 ir_node *new_op = be_transform_node(node);
2104 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
2105 *cc_out = ia32_cc_not_equal;
2109 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2111 /* must have a Cmp as input */
2112 ir_relation relation = get_Cmp_relation(cmp);
2113 ir_relation possible;
2114 ir_node *l = get_Cmp_left(cmp);
2115 ir_node *r = get_Cmp_right(cmp);
2116 ir_mode *mode = get_irn_mode(l);
2119 /* check for bit-test */
2120 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2121 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2122 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2124 ir_node *la = get_And_left(l);
2125 ir_node *ra = get_And_right(l);
2132 ir_node *c = get_Shl_left(la);
2133 if (is_Const_1(c) && is_Const_0(r)) {
2134 /* (1 << n) & ra) */
2135 ir_node *n = get_Shl_right(la);
2136 flags = gen_bt(cmp, ra, n);
2137 /* the bit is copied into the CF flag */
2138 if (relation & ir_relation_equal)
2139 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2141 *cc_out = ia32_cc_below; /* test for CF=1 */
2147 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2148 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2149 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2150 * a predecessor node). So add the < bit */
2151 possible = ir_get_possible_cmp_relations(l, r);
2152 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2153 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2154 relation |= ir_relation_less_greater;
2156 /* just do a normal transformation of the Cmp */
2157 *cc_out = relation_to_condition_code(relation, mode);
2158 flags = be_transform_node(cmp);
2163 * Transform a node returning a "flag" result.
2165 * @param node the node to transform
2166 * @param cc_out the compare mode to use
2168 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2171 return get_flags_node_cmp(node, cc_out);
2172 assert(get_irn_mode(node) == mode_b);
2173 return get_flags_mode_b(node, cc_out);
2177 * Transforms a Load.
2179 * @return the created ia32 Load node
2181 static ir_node *gen_Load(ir_node *node)
2183 ir_node *old_block = get_nodes_block(node);
2184 ir_node *block = be_transform_node(old_block);
2185 ir_node *ptr = get_Load_ptr(node);
2186 ir_node *mem = get_Load_mem(node);
2187 ir_node *new_mem = be_transform_node(mem);
2188 dbg_info *dbgi = get_irn_dbg_info(node);
2189 ir_mode *mode = get_Load_mode(node);
2190 int throws_exception = ir_throws_exception(node);
2194 ia32_address_t addr;
2196 /* construct load address */
2197 memset(&addr, 0, sizeof(addr));
2198 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2205 base = be_transform_node(base);
2208 if (index == NULL) {
2211 index = be_transform_node(index);
2214 if (mode_is_float(mode)) {
2215 if (ia32_cg_config.use_sse2) {
2216 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2219 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2223 assert(mode != mode_b);
2225 /* create a conv node with address mode for smaller modes */
2226 if (get_mode_size_bits(mode) < 32) {
2227 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2228 new_mem, noreg_GP, mode);
2230 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2233 ir_set_throws_exception(new_node, throws_exception);
2235 set_irn_pinned(new_node, get_irn_pinned(node));
2236 set_ia32_op_type(new_node, ia32_AddrModeS);
2237 set_ia32_ls_mode(new_node, mode);
2238 set_address(new_node, &addr);
2240 if (get_irn_pinned(node) == op_pin_state_floats) {
2241 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2242 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2243 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2244 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2247 SET_IA32_ORIG_NODE(new_node, node);
2252 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2253 ir_node *ptr, ir_node *other)
2260 /* we only use address mode if we're the only user of the load */
2261 if (get_irn_n_edges(node) > 1)
2264 load = get_Proj_pred(node);
2267 if (get_nodes_block(load) != block)
2270 /* store should have the same pointer as the load */
2271 if (get_Load_ptr(load) != ptr)
2274 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2275 if (other != NULL &&
2276 get_nodes_block(other) == block &&
2277 heights_reachable_in_block(ia32_heights, other, load)) {
2281 if (ia32_prevents_AM(block, load, mem))
2283 /* Store should be attached to the load via mem */
2284 assert(heights_reachable_in_block(ia32_heights, mem, load));
2289 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2290 ir_node *mem, ir_node *ptr, ir_mode *mode,
2291 construct_binop_dest_func *func,
2292 construct_binop_dest_func *func8bit,
2293 match_flags_t flags)
2295 ir_node *src_block = get_nodes_block(node);
2303 ia32_address_mode_t am;
2304 ia32_address_t *addr = &am.addr;
2305 memset(&am, 0, sizeof(am));
2307 assert(flags & match_immediate); /* there is no destam node without... */
2308 commutative = (flags & match_commutative) != 0;
2310 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2311 build_address(&am, op1, ia32_create_am_double_use);
2312 new_op = create_immediate_or_transform(op2, 0);
2313 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2314 build_address(&am, op2, ia32_create_am_double_use);
2315 new_op = create_immediate_or_transform(op1, 0);
2320 if (addr->base == NULL)
2321 addr->base = noreg_GP;
2322 if (addr->index == NULL)
2323 addr->index = noreg_GP;
2324 if (addr->mem == NULL)
2327 dbgi = get_irn_dbg_info(node);
2328 block = be_transform_node(src_block);
2329 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2331 if (get_mode_size_bits(mode) == 8) {
2332 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2334 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2336 set_address(new_node, addr);
2337 set_ia32_op_type(new_node, ia32_AddrModeD);
2338 set_ia32_ls_mode(new_node, mode);
2339 SET_IA32_ORIG_NODE(new_node, node);
2341 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2342 mem_proj = be_transform_node(am.mem_proj);
2343 be_set_transformed_node(am.mem_proj, new_node);
2344 be_set_transformed_node(mem_proj, new_node);
2349 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2350 ir_node *ptr, ir_mode *mode,
2351 construct_unop_dest_func *func)
2353 ir_node *src_block = get_nodes_block(node);
2359 ia32_address_mode_t am;
2360 ia32_address_t *addr = &am.addr;
2362 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2365 memset(&am, 0, sizeof(am));
2366 build_address(&am, op, ia32_create_am_double_use);
2368 dbgi = get_irn_dbg_info(node);
2369 block = be_transform_node(src_block);
2370 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2371 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2372 set_address(new_node, addr);
2373 set_ia32_op_type(new_node, ia32_AddrModeD);
2374 set_ia32_ls_mode(new_node, mode);
2375 SET_IA32_ORIG_NODE(new_node, node);
2377 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2378 mem_proj = be_transform_node(am.mem_proj);
2379 be_set_transformed_node(am.mem_proj, new_node);
2380 be_set_transformed_node(mem_proj, new_node);
2385 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2387 ir_mode *mode = get_irn_mode(node);
2388 ir_node *mux_true = get_Mux_true(node);
2389 ir_node *mux_false = get_Mux_false(node);
2397 ia32_condition_code_t cc;
2398 ia32_address_t addr;
2400 if (get_mode_size_bits(mode) != 8)
2403 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2405 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2411 cond = get_Mux_sel(node);
2412 flags = get_flags_node(cond, &cc);
2413 /* we can't handle the float special cases with SetM */
2414 if (cc & ia32_cc_additional_float_cases)
2417 cc = ia32_negate_condition_code(cc);
2419 build_address_ptr(&addr, ptr, mem);
2421 dbgi = get_irn_dbg_info(node);
2422 block = get_nodes_block(node);
2423 new_block = be_transform_node(block);
2424 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2425 addr.index, addr.mem, flags, cc);
2426 set_address(new_node, &addr);
2427 set_ia32_op_type(new_node, ia32_AddrModeD);
2428 set_ia32_ls_mode(new_node, mode);
2429 SET_IA32_ORIG_NODE(new_node, node);
2434 static ir_node *try_create_dest_am(ir_node *node)
2436 ir_node *val = get_Store_value(node);
2437 ir_node *mem = get_Store_mem(node);
2438 ir_node *ptr = get_Store_ptr(node);
2439 ir_mode *mode = get_irn_mode(val);
2440 unsigned bits = get_mode_size_bits(mode);
2445 /* handle only GP modes for now... */
2446 if (!ia32_mode_needs_gp_reg(mode))
2450 /* store must be the only user of the val node */
2451 if (get_irn_n_edges(val) > 1)
2453 /* skip pointless convs */
2455 ir_node *conv_op = get_Conv_op(val);
2456 ir_mode *pred_mode = get_irn_mode(conv_op);
2457 if (!ia32_mode_needs_gp_reg(pred_mode))
2459 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2467 /* value must be in the same block */
2468 if (get_nodes_block(node) != get_nodes_block(val))
2471 switch (get_irn_opcode(val)) {
2473 op1 = get_Add_left(val);
2474 op2 = get_Add_right(val);
2475 if (ia32_cg_config.use_incdec) {
2476 if (is_Const_1(op2)) {
2477 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2479 } else if (is_Const_Minus_1(op2)) {
2480 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2484 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2485 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2486 match_commutative | match_immediate);
2489 op1 = get_Sub_left(val);
2490 op2 = get_Sub_right(val);
2491 if (is_Const(op2)) {
2492 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2494 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2495 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2499 op1 = get_And_left(val);
2500 op2 = get_And_right(val);
2501 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2502 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2503 match_commutative | match_immediate);
2506 op1 = get_Or_left(val);
2507 op2 = get_Or_right(val);
2508 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2509 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2510 match_commutative | match_immediate);
2513 op1 = get_Eor_left(val);
2514 op2 = get_Eor_right(val);
2515 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2516 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2517 match_commutative | match_immediate);
2520 op1 = get_Shl_left(val);
2521 op2 = get_Shl_right(val);
2522 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2523 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2527 op1 = get_Shr_left(val);
2528 op2 = get_Shr_right(val);
2529 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2530 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2534 op1 = get_Shrs_left(val);
2535 op2 = get_Shrs_right(val);
2536 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2537 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2541 op1 = get_Rotl_left(val);
2542 op2 = get_Rotl_right(val);
2543 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2544 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2547 /* TODO: match ROR patterns... */
2549 new_node = try_create_SetMem(val, ptr, mem);
2553 op1 = get_Minus_op(val);
2554 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2557 /* should be lowered already */
2558 assert(mode != mode_b);
2559 op1 = get_Not_op(val);
2560 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2566 if (new_node != NULL) {
2567 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2568 get_irn_pinned(node) == op_pin_state_pinned) {
2569 set_irn_pinned(new_node, op_pin_state_pinned);
2576 static bool possible_int_mode_for_fp(ir_mode *mode)
2580 if (!mode_is_signed(mode))
2582 size = get_mode_size_bits(mode);
2583 if (size != 16 && size != 32)
2588 static int is_float_to_int_conv(const ir_node *node)
2590 ir_mode *mode = get_irn_mode(node);
2594 if (!possible_int_mode_for_fp(mode))
2599 conv_op = get_Conv_op(node);
2600 conv_mode = get_irn_mode(conv_op);
2602 if (!mode_is_float(conv_mode))
2609 * Transform a Store(floatConst) into a sequence of
2612 * @return the created ia32 Store node
2614 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2616 ir_mode *mode = get_irn_mode(cns);
2617 unsigned size = get_mode_size_bytes(mode);
2618 ir_tarval *tv = get_Const_tarval(cns);
2619 ir_node *block = get_nodes_block(node);
2620 ir_node *new_block = be_transform_node(block);
2621 ir_node *ptr = get_Store_ptr(node);
2622 ir_node *mem = get_Store_mem(node);
2623 dbg_info *dbgi = get_irn_dbg_info(node);
2626 int throws_exception = ir_throws_exception(node);
2628 ia32_address_t addr;
2630 assert(size % 4 == 0);
2633 build_address_ptr(&addr, ptr, mem);
2637 get_tarval_sub_bits(tv, ofs) |
2638 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2639 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2640 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2641 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2643 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2644 addr.index, addr.mem, imm);
2645 ir_node *mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2647 ir_set_throws_exception(new_node, throws_exception);
2648 set_irn_pinned(new_node, get_irn_pinned(node));
2649 set_ia32_op_type(new_node, ia32_AddrModeD);
2650 set_ia32_ls_mode(new_node, mode_Iu);
2651 set_address(new_node, &addr);
2652 SET_IA32_ORIG_NODE(new_node, node);
2660 } while (size != 0);
2663 return new_rd_Sync(dbgi, new_block, i, ins);
2665 return get_Proj_pred(ins[0]);
2670 * Generate a vfist or vfisttp instruction.
2672 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2673 ir_node *index, ir_node *mem, ir_node *val)
2675 if (ia32_cg_config.use_fisttp) {
2676 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2677 if other users exists */
2678 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2679 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2680 be_new_Keep(block, 1, &value);
2684 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2687 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2693 * Transforms a general (no special case) Store.
2695 * @return the created ia32 Store node
2697 static ir_node *gen_general_Store(ir_node *node)
2699 ir_node *val = get_Store_value(node);
2700 ir_mode *mode = get_irn_mode(val);
2701 ir_node *block = get_nodes_block(node);
2702 ir_node *new_block = be_transform_node(block);
2703 ir_node *ptr = get_Store_ptr(node);
2704 ir_node *mem = get_Store_mem(node);
2705 dbg_info *dbgi = get_irn_dbg_info(node);
2706 int throws_exception = ir_throws_exception(node);
2709 ia32_address_t addr;
2711 /* check for destination address mode */
2712 new_node = try_create_dest_am(node);
2713 if (new_node != NULL)
2716 /* construct store address */
2717 memset(&addr, 0, sizeof(addr));
2718 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2720 if (addr.base == NULL) {
2721 addr.base = noreg_GP;
2723 addr.base = be_transform_node(addr.base);
2726 if (addr.index == NULL) {
2727 addr.index = noreg_GP;
2729 addr.index = be_transform_node(addr.index);
2731 addr.mem = be_transform_node(mem);
2733 if (mode_is_float(mode)) {
2734 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2736 while (is_Conv(val) && mode == get_irn_mode(val)) {
2737 ir_node *op = get_Conv_op(val);
2738 if (!mode_is_float(get_irn_mode(op)))
2742 new_val = be_transform_node(val);
2743 if (ia32_cg_config.use_sse2) {
2744 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2745 addr.index, addr.mem, new_val);
2747 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2748 addr.index, addr.mem, new_val, mode);
2750 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2751 val = get_Conv_op(val);
2753 /* TODO: is this optimisation still necessary at all (middleend)? */
2754 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2756 while (is_Conv(val)) {
2757 ir_node *op = get_Conv_op(val);
2758 if (!mode_is_float(get_irn_mode(op)))
2760 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2764 new_val = be_transform_node(val);
2765 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2767 new_val = create_immediate_or_transform(val, 0);
2768 assert(mode != mode_b);
2770 if (get_mode_size_bits(mode) == 8) {
2771 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2772 addr.index, addr.mem, new_val);
2774 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2775 addr.index, addr.mem, new_val);
2778 ir_set_throws_exception(new_node, throws_exception);
2780 set_irn_pinned(new_node, get_irn_pinned(node));
2781 set_ia32_op_type(new_node, ia32_AddrModeD);
2782 set_ia32_ls_mode(new_node, mode);
2784 set_address(new_node, &addr);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 * Transforms a Store.
2793 * @return the created ia32 Store node
2795 static ir_node *gen_Store(ir_node *node)
2797 ir_node *val = get_Store_value(node);
2798 ir_mode *mode = get_irn_mode(val);
2800 if (mode_is_float(mode) && is_Const(val)) {
2801 /* We can transform every floating const store
2802 into a sequence of integer stores.
2803 If the constant is already in a register,
2804 it would be better to use it, but we don't
2805 have this information here. */
2806 return gen_float_const_Store(node, val);
2808 return gen_general_Store(node);
2812 * Transforms a Switch.
2814 * @return the created ia32 SwitchJmp node
2816 static ir_node *create_Switch(ir_node *node)
2818 dbg_info *dbgi = get_irn_dbg_info(node);
2819 ir_node *block = be_transform_node(get_nodes_block(node));
2820 ir_node *sel = get_Cond_selector(node);
2821 ir_node *new_sel = be_transform_node(sel);
2822 long default_pn = get_Cond_default_proj(node);
2826 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2828 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2829 set_entity_visibility(entity, ir_visibility_private);
2830 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2832 /* TODO: we could perform some more matching here to also use the base
2833 * register of the address mode */
2835 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2836 set_ia32_am_scale(new_node, 2);
2837 set_ia32_am_sc(new_node, entity);
2838 set_ia32_op_type(new_node, ia32_AddrModeS);
2839 set_ia32_ls_mode(new_node, mode_Iu);
2840 SET_IA32_ORIG_NODE(new_node, node);
2846 * Transform a Cond node.
2848 static ir_node *gen_Cond(ir_node *node)
2850 ir_node *block = get_nodes_block(node);
2851 ir_node *new_block = be_transform_node(block);
2852 dbg_info *dbgi = get_irn_dbg_info(node);
2853 ir_node *sel = get_Cond_selector(node);
2854 ir_mode *sel_mode = get_irn_mode(sel);
2855 ir_node *flags = NULL;
2857 ia32_condition_code_t cc;
2859 if (sel_mode != mode_b) {
2860 return create_Switch(node);
2863 /* we get flags from a Cmp */
2864 flags = get_flags_node(sel, &cc);
2866 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2867 SET_IA32_ORIG_NODE(new_node, node);
2873 * Transform a be_Copy.
2875 static ir_node *gen_be_Copy(ir_node *node)
2877 ir_node *new_node = be_duplicate_node(node);
2878 ir_mode *mode = get_irn_mode(new_node);
2880 if (ia32_mode_needs_gp_reg(mode)) {
2881 set_irn_mode(new_node, mode_Iu);
2887 static ir_node *create_Fucom(ir_node *node)
2889 dbg_info *dbgi = get_irn_dbg_info(node);
2890 ir_node *block = get_nodes_block(node);
2891 ir_node *new_block = be_transform_node(block);
2892 ir_node *left = get_Cmp_left(node);
2893 ir_node *new_left = be_transform_node(left);
2894 ir_node *right = get_Cmp_right(node);
2898 if (ia32_cg_config.use_fucomi) {
2899 new_right = be_transform_node(right);
2900 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2902 set_ia32_commutative(new_node);
2903 SET_IA32_ORIG_NODE(new_node, node);
2905 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2906 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2908 new_right = be_transform_node(right);
2909 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2912 set_ia32_commutative(new_node);
2914 SET_IA32_ORIG_NODE(new_node, node);
2916 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2917 SET_IA32_ORIG_NODE(new_node, node);
2923 static ir_node *create_Ucomi(ir_node *node)
2925 dbg_info *dbgi = get_irn_dbg_info(node);
2926 ir_node *src_block = get_nodes_block(node);
2927 ir_node *new_block = be_transform_node(src_block);
2928 ir_node *left = get_Cmp_left(node);
2929 ir_node *right = get_Cmp_right(node);
2931 ia32_address_mode_t am;
2932 ia32_address_t *addr = &am.addr;
2934 match_arguments(&am, src_block, left, right, NULL,
2935 match_commutative | match_am);
2937 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2938 addr->mem, am.new_op1, am.new_op2,
2940 set_am_attributes(new_node, &am);
2942 SET_IA32_ORIG_NODE(new_node, node);
2944 new_node = fix_mem_proj(new_node, &am);
2950 * returns true if it is assured, that the upper bits of a node are "clean"
2951 * which means for a 16 or 8 bit value, that the upper bits in the register
2952 * are 0 for unsigned and a copy of the last significant bit for signed
2955 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2957 assert(ia32_mode_needs_gp_reg(mode));
2958 if (get_mode_size_bits(mode) >= 32)
2961 if (is_Proj(transformed_node))
2962 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2964 switch (get_ia32_irn_opcode(transformed_node)) {
2965 case iro_ia32_Conv_I2I:
2966 case iro_ia32_Conv_I2I8Bit: {
2967 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2968 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2970 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2977 if (mode_is_signed(mode)) {
2978 return false; /* TODO handle signed modes */
2980 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2981 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2982 const ia32_immediate_attr_t *attr
2983 = get_ia32_immediate_attr_const(right);
2984 if (attr->symconst == 0 &&
2985 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2989 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2993 /* TODO too conservative if shift amount is constant */
2994 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2997 if (!mode_is_signed(mode)) {
2999 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
3000 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
3002 /* TODO if one is known to be zero extended, then || is sufficient */
3007 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3008 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3010 case iro_ia32_Const:
3011 case iro_ia32_Immediate: {
3012 const ia32_immediate_attr_t *attr =
3013 get_ia32_immediate_attr_const(transformed_node);
3014 if (mode_is_signed(mode)) {
3015 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3016 return shifted == 0 || shifted == -1;
3018 unsigned long shifted = (unsigned long)attr->offset;
3019 shifted >>= get_mode_size_bits(mode);
3020 return shifted == 0;
3030 * Generate code for a Cmp.
3032 static ir_node *gen_Cmp(ir_node *node)
3034 dbg_info *dbgi = get_irn_dbg_info(node);
3035 ir_node *block = get_nodes_block(node);
3036 ir_node *new_block = be_transform_node(block);
3037 ir_node *left = get_Cmp_left(node);
3038 ir_node *right = get_Cmp_right(node);
3039 ir_mode *cmp_mode = get_irn_mode(left);
3041 ia32_address_mode_t am;
3042 ia32_address_t *addr = &am.addr;
3044 if (mode_is_float(cmp_mode)) {
3045 if (ia32_cg_config.use_sse2) {
3046 return create_Ucomi(node);
3048 return create_Fucom(node);
3052 assert(ia32_mode_needs_gp_reg(cmp_mode));
3054 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3055 if (is_Const_0(right) &&
3057 get_irn_n_edges(left) == 1) {
3058 /* Test(and_left, and_right) */
3059 ir_node *and_left = get_And_left(left);
3060 ir_node *and_right = get_And_right(left);
3062 /* matze: code here used mode instead of cmd_mode, I think it is always
3063 * the same as cmp_mode, but I leave this here to see if this is really
3066 assert(get_irn_mode(and_left) == cmp_mode);
3068 match_arguments(&am, block, and_left, and_right, NULL,
3070 match_am | match_8bit_am | match_16bit_am |
3071 match_am_and_immediates | match_immediate);
3073 /* use 32bit compare mode if possible since the opcode is smaller */
3074 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3075 upper_bits_clean(am.new_op2, cmp_mode)) {
3076 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3079 if (get_mode_size_bits(cmp_mode) == 8) {
3080 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3081 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3083 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3084 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3087 /* Cmp(left, right) */
3088 match_arguments(&am, block, left, right, NULL,
3089 match_commutative | match_am | match_8bit_am |
3090 match_16bit_am | match_am_and_immediates |
3092 /* use 32bit compare mode if possible since the opcode is smaller */
3093 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3094 upper_bits_clean(am.new_op2, cmp_mode)) {
3095 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3098 if (get_mode_size_bits(cmp_mode) == 8) {
3099 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3100 addr->index, addr->mem, am.new_op1,
3101 am.new_op2, am.ins_permuted);
3103 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3104 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3107 set_am_attributes(new_node, &am);
3108 set_ia32_ls_mode(new_node, cmp_mode);
3110 SET_IA32_ORIG_NODE(new_node, node);
3112 new_node = fix_mem_proj(new_node, &am);
3117 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3118 ia32_condition_code_t cc)
3120 dbg_info *dbgi = get_irn_dbg_info(node);
3121 ir_node *block = get_nodes_block(node);
3122 ir_node *new_block = be_transform_node(block);
3123 ir_node *val_true = get_Mux_true(node);
3124 ir_node *val_false = get_Mux_false(node);
3126 ia32_address_mode_t am;
3127 ia32_address_t *addr;
3129 assert(ia32_cg_config.use_cmov);
3130 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3134 match_arguments(&am, block, val_false, val_true, flags,
3135 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3137 if (am.ins_permuted)
3138 cc = ia32_negate_condition_code(cc);
3140 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3141 addr->mem, am.new_op1, am.new_op2, new_flags,
3143 set_am_attributes(new_node, &am);
3145 SET_IA32_ORIG_NODE(new_node, node);
3147 new_node = fix_mem_proj(new_node, &am);
3153 * Creates a ia32 Setcc instruction.
3155 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3156 ir_node *flags, ia32_condition_code_t cc,
3159 ir_mode *mode = get_irn_mode(orig_node);
3162 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3163 SET_IA32_ORIG_NODE(new_node, orig_node);
3165 /* we might need to conv the result up */
3166 if (get_mode_size_bits(mode) > 8) {
3167 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3168 nomem, new_node, mode_Bu);
3169 SET_IA32_ORIG_NODE(new_node, orig_node);
3176 * Create instruction for an unsigned Difference or Zero.
3178 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3180 ir_mode *mode = get_irn_mode(psi);
3190 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3191 match_mode_neutral | match_am | match_immediate | match_two_users);
3193 block = get_nodes_block(new_node);
3195 if (is_Proj(new_node)) {
3196 sub = get_Proj_pred(new_node);
3199 set_irn_mode(sub, mode_T);
3200 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3202 assert(is_ia32_Sub(sub));
3203 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3205 dbgi = get_irn_dbg_info(psi);
3206 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3207 notn = new_bd_ia32_Not(dbgi, block, sbb);
3209 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3210 set_ia32_commutative(new_node);
3215 * Create an const array of two float consts.
3217 * @param c0 the first constant
3218 * @param c1 the second constant
3219 * @param new_mode IN/OUT for the mode of the constants, if NULL
3220 * smallest possible mode will be used
3222 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3225 ir_mode *mode = *new_mode;
3227 ir_initializer_t *initializer;
3228 ir_tarval *tv0 = get_Const_tarval(c0);
3229 ir_tarval *tv1 = get_Const_tarval(c1);
3232 /* detect the best mode for the constants */
3233 mode = get_tarval_mode(tv0);
3235 if (mode != mode_F) {
3236 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3237 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3239 tv0 = tarval_convert_to(tv0, mode);
3240 tv1 = tarval_convert_to(tv1, mode);
3241 } else if (mode != mode_D) {
3242 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3243 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3245 tv0 = tarval_convert_to(tv0, mode);
3246 tv1 = tarval_convert_to(tv1, mode);
3253 tp = ia32_create_float_type(mode, 4);
3254 tp = ia32_create_float_array(tp);
3256 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3258 set_entity_ld_ident(ent, get_entity_ident(ent));
3259 set_entity_visibility(ent, ir_visibility_private);
3260 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3262 initializer = create_initializer_compound(2);
3264 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3265 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3267 set_entity_initializer(ent, initializer);
3274 * Possible transformations for creating a Setcc.
3276 enum setcc_transform_insn {
3289 typedef struct setcc_transform {
3291 ia32_condition_code_t cc;
3293 enum setcc_transform_insn transform;
3297 } setcc_transform_t;
3300 * Setcc can only handle 0 and 1 result.
3301 * Find a transformation that creates 0 and 1 from
3304 static void find_const_transform(ia32_condition_code_t cc,
3305 ir_tarval *t, ir_tarval *f,
3306 setcc_transform_t *res)
3312 if (tarval_is_null(t)) {
3316 cc = ia32_negate_condition_code(cc);
3317 } else if (tarval_cmp(t, f) == ir_relation_less) {
3318 // now, t is the bigger one
3322 cc = ia32_negate_condition_code(cc);
3326 if (! tarval_is_null(f)) {
3327 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3330 res->steps[step].transform = SETCC_TR_ADD;
3332 if (t == tarval_bad)
3333 panic("constant subtract failed");
3334 if (! tarval_is_long(f))
3335 panic("tarval is not long");
3337 res->steps[step].val = get_tarval_long(f);
3339 f = tarval_sub(f, f, NULL);
3340 assert(tarval_is_null(f));
3343 if (tarval_is_one(t)) {
3344 res->steps[step].transform = SETCC_TR_SET;
3345 res->num_steps = ++step;
3349 if (tarval_is_minus_one(t)) {
3350 res->steps[step].transform = SETCC_TR_NEG;
3352 res->steps[step].transform = SETCC_TR_SET;
3353 res->num_steps = ++step;
3356 if (tarval_is_long(t)) {
3357 long v = get_tarval_long(t);
3359 res->steps[step].val = 0;
3362 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3364 res->steps[step].transform = SETCC_TR_LEAxx;
3365 res->steps[step].scale = 3; /* (a << 3) + a */
3368 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3370 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3371 res->steps[step].scale = 3; /* (a << 3) */
3374 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3376 res->steps[step].transform = SETCC_TR_LEAxx;
3377 res->steps[step].scale = 2; /* (a << 2) + a */
3380 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3382 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3383 res->steps[step].scale = 2; /* (a << 2) */
3386 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3388 res->steps[step].transform = SETCC_TR_LEAxx;
3389 res->steps[step].scale = 1; /* (a << 1) + a */
3392 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3394 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3395 res->steps[step].scale = 1; /* (a << 1) */
3398 res->num_steps = step;
3401 if (! tarval_is_single_bit(t)) {
3402 res->steps[step].transform = SETCC_TR_AND;
3403 res->steps[step].val = v;
3405 res->steps[step].transform = SETCC_TR_NEG;
3407 int v = get_tarval_lowest_bit(t);
3410 res->steps[step].transform = SETCC_TR_SHL;
3411 res->steps[step].scale = v;
3415 res->steps[step].transform = SETCC_TR_SET;
3416 res->num_steps = ++step;
3419 panic("tarval is not long");
3423 * Transforms a Mux node into some code sequence.
3425 * @return The transformed node.
3427 static ir_node *gen_Mux(ir_node *node)
3429 dbg_info *dbgi = get_irn_dbg_info(node);
3430 ir_node *block = get_nodes_block(node);
3431 ir_node *new_block = be_transform_node(block);
3432 ir_node *mux_true = get_Mux_true(node);
3433 ir_node *mux_false = get_Mux_false(node);
3434 ir_node *sel = get_Mux_sel(node);
3435 ir_mode *mode = get_irn_mode(node);
3439 ia32_condition_code_t cc;
3441 assert(get_irn_mode(sel) == mode_b);
3443 is_abs = ir_mux_is_abs(sel, mux_true, mux_false);
3445 if (ia32_mode_needs_gp_reg(mode)) {
3446 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3449 ir_node *op = ir_get_abs_op(sel, mux_true, mux_false);
3450 return create_abs(dbgi, block, op, is_abs < 0, node);
3454 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3455 if (mode_is_float(mode)) {
3456 ir_node *cmp_left = get_Cmp_left(sel);
3457 ir_node *cmp_right = get_Cmp_right(sel);
3458 ir_relation relation = get_Cmp_relation(sel);
3460 if (ia32_cg_config.use_sse2) {
3461 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3462 if (cmp_left == mux_true && cmp_right == mux_false) {
3463 /* Mux(a <= b, a, b) => MIN */
3464 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3465 match_commutative | match_am | match_two_users);
3466 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3467 /* Mux(a <= b, b, a) => MAX */
3468 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3469 match_commutative | match_am | match_two_users);
3471 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3472 if (cmp_left == mux_true && cmp_right == mux_false) {
3473 /* Mux(a >= b, a, b) => MAX */
3474 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3475 match_commutative | match_am | match_two_users);
3476 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3477 /* Mux(a >= b, b, a) => MIN */
3478 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3479 match_commutative | match_am | match_two_users);
3484 if (is_Const(mux_true) && is_Const(mux_false)) {
3485 ia32_address_mode_t am;
3490 flags = get_flags_node(sel, &cc);
3491 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3493 if (ia32_cg_config.use_sse2) {
3494 /* cannot load from different mode on SSE */
3497 /* x87 can load any mode */
3501 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3503 switch (get_mode_size_bytes(new_mode)) {
3513 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3514 set_ia32_am_scale(new_node, 2);
3519 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3520 set_ia32_am_scale(new_node, 1);
3523 /* arg, shift 16 NOT supported */
3525 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3528 panic("Unsupported constant size");
3531 am.ls_mode = new_mode;
3532 am.addr.base = get_symconst_base();
3533 am.addr.index = new_node;
3534 am.addr.mem = nomem;
3536 am.addr.scale = scale;
3537 am.addr.use_frame = 0;
3538 am.addr.tls_segment = false;
3539 am.addr.frame_entity = NULL;
3540 am.addr.symconst_sign = 0;
3541 am.mem_proj = am.addr.mem;
3542 am.op_type = ia32_AddrModeS;
3545 am.pinned = op_pin_state_floats;
3547 am.ins_permuted = false;
3549 if (ia32_cg_config.use_sse2)
3550 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3552 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3553 set_am_attributes(load, &am);
3555 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3557 panic("cannot transform floating point Mux");
3560 assert(ia32_mode_needs_gp_reg(mode));
3563 ir_node *cmp_left = get_Cmp_left(sel);
3564 ir_node *cmp_right = get_Cmp_right(sel);
3565 ir_relation relation = get_Cmp_relation(sel);
3566 ir_node *val_true = mux_true;
3567 ir_node *val_false = mux_false;
3569 if (is_Const(val_true) && is_Const_null(val_true)) {
3570 ir_node *tmp = val_false;
3571 val_false = val_true;
3573 relation = get_negated_relation(relation);
3575 if (is_Const_0(val_false) && is_Sub(val_true)) {
3576 if ((relation & ir_relation_greater)
3577 && get_Sub_left(val_true) == cmp_left
3578 && get_Sub_right(val_true) == cmp_right) {
3579 return create_doz(node, cmp_left, cmp_right);
3581 if ((relation & ir_relation_less)
3582 && get_Sub_left(val_true) == cmp_right
3583 && get_Sub_right(val_true) == cmp_left) {
3584 return create_doz(node, cmp_right, cmp_left);
3589 flags = get_flags_node(sel, &cc);
3591 if (is_Const(mux_true) && is_Const(mux_false)) {
3592 /* both are const, good */
3593 ir_tarval *tv_true = get_Const_tarval(mux_true);
3594 ir_tarval *tv_false = get_Const_tarval(mux_false);
3595 setcc_transform_t res;
3598 find_const_transform(cc, tv_true, tv_false, &res);
3600 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3603 switch (res.steps[step].transform) {
3605 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3606 add_ia32_am_offs_int(new_node, res.steps[step].val);
3608 case SETCC_TR_ADDxx:
3609 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3612 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3613 set_ia32_am_scale(new_node, res.steps[step].scale);
3614 set_ia32_am_offs_int(new_node, res.steps[step].val);
3616 case SETCC_TR_LEAxx:
3617 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3618 set_ia32_am_scale(new_node, res.steps[step].scale);
3619 set_ia32_am_offs_int(new_node, res.steps[step].val);
3622 imm = ia32_immediate_from_long(res.steps[step].scale);
3623 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3626 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3629 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3632 imm = ia32_immediate_from_long(res.steps[step].val);
3633 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3636 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3639 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3642 panic("unknown setcc transform");
3646 new_node = create_CMov(node, sel, flags, cc);
3654 * Create a conversion from x87 state register to general purpose.
3656 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3658 ir_node *block = be_transform_node(get_nodes_block(node));
3659 ir_node *op = get_Conv_op(node);
3660 ir_node *new_op = be_transform_node(op);
3661 ir_graph *irg = current_ir_graph;
3662 dbg_info *dbgi = get_irn_dbg_info(node);
3663 ir_mode *mode = get_irn_mode(node);
3664 ir_node *frame = get_irg_frame(irg);
3665 ir_node *fist, *load, *mem;
3667 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3668 set_irn_pinned(fist, op_pin_state_floats);
3669 set_ia32_use_frame(fist);
3670 set_ia32_op_type(fist, ia32_AddrModeD);
3672 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3673 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3675 assert(get_mode_size_bits(mode) <= 32);
3676 /* exception we can only store signed 32 bit integers, so for unsigned
3677 we store a 64bit (signed) integer and load the lower bits */
3678 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3679 set_ia32_ls_mode(fist, mode_Ls);
3681 set_ia32_ls_mode(fist, mode_Is);
3683 SET_IA32_ORIG_NODE(fist, node);
3686 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3688 set_irn_pinned(load, op_pin_state_floats);
3689 set_ia32_use_frame(load);
3690 set_ia32_op_type(load, ia32_AddrModeS);
3691 set_ia32_ls_mode(load, mode_Is);
3692 if (get_ia32_ls_mode(fist) == mode_Ls) {
3693 ia32_attr_t *attr = get_ia32_attr(load);
3694 attr->data.need_64bit_stackent = 1;
3696 ia32_attr_t *attr = get_ia32_attr(load);
3697 attr->data.need_32bit_stackent = 1;
3699 SET_IA32_ORIG_NODE(load, node);
3701 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3705 * Creates a x87 strict Conv by placing a Store and a Load
3707 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3709 ir_node *block = get_nodes_block(node);
3710 ir_graph *irg = get_Block_irg(block);
3711 dbg_info *dbgi = get_irn_dbg_info(node);
3712 ir_node *frame = get_irg_frame(irg);
3714 ir_node *store, *load;
3717 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3718 set_ia32_use_frame(store);
3719 set_ia32_op_type(store, ia32_AddrModeD);
3720 SET_IA32_ORIG_NODE(store, node);
3722 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3724 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3725 set_ia32_use_frame(load);
3726 set_ia32_op_type(load, ia32_AddrModeS);
3727 SET_IA32_ORIG_NODE(load, node);
3729 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3733 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3734 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3736 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3738 func = get_mode_size_bits(mode) == 8 ?
3739 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3740 return func(dbgi, block, base, index, mem, val, mode);
3744 * Create a conversion from general purpose to x87 register
3746 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3748 ir_node *src_block = get_nodes_block(node);
3749 ir_node *block = be_transform_node(src_block);
3750 ir_graph *irg = get_Block_irg(block);
3751 dbg_info *dbgi = get_irn_dbg_info(node);
3752 ir_node *op = get_Conv_op(node);
3753 ir_node *new_op = NULL;
3755 ir_mode *store_mode;
3761 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3762 if (possible_int_mode_for_fp(src_mode)) {
3763 ia32_address_mode_t am;
3765 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3766 if (am.op_type == ia32_AddrModeS) {
3767 ia32_address_t *addr = &am.addr;
3769 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3770 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3772 set_am_attributes(fild, &am);
3773 SET_IA32_ORIG_NODE(fild, node);
3775 fix_mem_proj(fild, &am);
3780 if (new_op == NULL) {
3781 new_op = be_transform_node(op);
3784 mode = get_irn_mode(op);
3786 /* first convert to 32 bit signed if necessary */
3787 if (get_mode_size_bits(src_mode) < 32) {
3788 if (!upper_bits_clean(new_op, src_mode)) {
3789 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3790 SET_IA32_ORIG_NODE(new_op, node);
3795 assert(get_mode_size_bits(mode) == 32);
3798 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3800 set_ia32_use_frame(store);
3801 set_ia32_op_type(store, ia32_AddrModeD);
3802 set_ia32_ls_mode(store, mode_Iu);
3804 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3806 /* exception for 32bit unsigned, do a 64bit spill+load */
3807 if (!mode_is_signed(mode)) {
3810 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3812 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3813 noreg_GP, nomem, zero_const);
3814 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3816 set_ia32_use_frame(zero_store);
3817 set_ia32_op_type(zero_store, ia32_AddrModeD);
3818 add_ia32_am_offs_int(zero_store, 4);
3819 set_ia32_ls_mode(zero_store, mode_Iu);
3821 in[0] = zero_store_mem;
3824 store_mem = new_rd_Sync(dbgi, block, 2, in);
3825 store_mode = mode_Ls;
3827 store_mode = mode_Is;
3831 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3833 set_ia32_use_frame(fild);
3834 set_ia32_op_type(fild, ia32_AddrModeS);
3835 set_ia32_ls_mode(fild, store_mode);
3837 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3843 * Create a conversion from one integer mode into another one
3845 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3846 dbg_info *dbgi, ir_node *block, ir_node *op,
3849 ir_node *new_block = be_transform_node(block);
3851 ir_mode *smaller_mode;
3852 ia32_address_mode_t am;
3853 ia32_address_t *addr = &am.addr;
3856 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3857 smaller_mode = src_mode;
3859 smaller_mode = tgt_mode;
3862 #ifdef DEBUG_libfirm
3864 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3869 match_arguments(&am, block, NULL, op, NULL,
3870 match_am | match_8bit_am | match_16bit_am);
3872 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3873 /* unnecessary conv. in theory it shouldn't have been AM */
3874 assert(is_ia32_NoReg_GP(addr->base));
3875 assert(is_ia32_NoReg_GP(addr->index));
3876 assert(is_NoMem(addr->mem));
3877 assert(am.addr.offset == 0);
3878 assert(am.addr.symconst_ent == NULL);
3882 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3883 addr->mem, am.new_op2, smaller_mode);
3884 set_am_attributes(new_node, &am);
3885 /* match_arguments assume that out-mode = in-mode, this isn't true here
3887 set_ia32_ls_mode(new_node, smaller_mode);
3888 SET_IA32_ORIG_NODE(new_node, node);
3889 new_node = fix_mem_proj(new_node, &am);
3894 * Transforms a Conv node.
3896 * @return The created ia32 Conv node
3898 static ir_node *gen_Conv(ir_node *node)
3900 ir_node *block = get_nodes_block(node);
3901 ir_node *new_block = be_transform_node(block);
3902 ir_node *op = get_Conv_op(node);
3903 ir_node *new_op = NULL;
3904 dbg_info *dbgi = get_irn_dbg_info(node);
3905 ir_mode *src_mode = get_irn_mode(op);
3906 ir_mode *tgt_mode = get_irn_mode(node);
3907 int src_bits = get_mode_size_bits(src_mode);
3908 int tgt_bits = get_mode_size_bits(tgt_mode);
3909 ir_node *res = NULL;
3911 assert(!mode_is_int(src_mode) || src_bits <= 32);
3912 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3914 /* modeB -> X should already be lowered by the lower_mode_b pass */
3915 if (src_mode == mode_b) {
3916 panic("ConvB not lowered %+F", node);
3919 if (src_mode == tgt_mode) {
3920 if (get_Conv_strict(node)) {
3921 if (ia32_cg_config.use_sse2) {
3922 /* when we are in SSE mode, we can kill all strict no-op conversion */
3923 return be_transform_node(op);
3926 /* this should be optimized already, but who knows... */
3927 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3928 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3929 return be_transform_node(op);
3933 if (mode_is_float(src_mode)) {
3934 new_op = be_transform_node(op);
3935 /* we convert from float ... */
3936 if (mode_is_float(tgt_mode)) {
3938 if (ia32_cg_config.use_sse2) {
3939 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3940 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3942 set_ia32_ls_mode(res, tgt_mode);
3944 if (get_Conv_strict(node)) {
3945 /* if fp_no_float_fold is not set then we assume that we
3946 * don't have any float operations in a non
3947 * mode_float_arithmetic mode and can skip strict upconvs */
3948 if (src_bits < tgt_bits) {
3949 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3952 res = gen_x87_strict_conv(tgt_mode, new_op);
3953 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3957 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3962 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3963 if (ia32_cg_config.use_sse2) {
3964 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3966 set_ia32_ls_mode(res, src_mode);
3968 return gen_x87_fp_to_gp(node);
3972 /* we convert from int ... */
3973 if (mode_is_float(tgt_mode)) {
3975 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3976 if (ia32_cg_config.use_sse2) {
3977 new_op = be_transform_node(op);
3978 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3980 set_ia32_ls_mode(res, tgt_mode);
3982 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3983 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3984 res = gen_x87_gp_to_fp(node, src_mode);
3986 /* we need a strict-Conv, if the int mode has more bits than the
3988 if (float_mantissa < int_mantissa) {
3989 res = gen_x87_strict_conv(tgt_mode, res);
3990 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3994 } else if (tgt_mode == mode_b) {
3995 /* mode_b lowering already took care that we only have 0/1 values */
3996 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3997 src_mode, tgt_mode));
3998 return be_transform_node(op);
4001 if (src_bits == tgt_bits) {
4002 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4003 src_mode, tgt_mode));
4004 return be_transform_node(op);
4007 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4015 static ir_node *create_immediate_or_transform(ir_node *node,
4016 char immediate_constraint_type)
4018 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4019 if (new_node == NULL) {
4020 new_node = be_transform_node(node);
4026 * Transforms a FrameAddr into an ia32 Add.
4028 static ir_node *gen_be_FrameAddr(ir_node *node)
4030 ir_node *block = be_transform_node(get_nodes_block(node));
4031 ir_node *op = be_get_FrameAddr_frame(node);
4032 ir_node *new_op = be_transform_node(op);
4033 dbg_info *dbgi = get_irn_dbg_info(node);
4036 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4037 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4038 set_ia32_use_frame(new_node);
4040 SET_IA32_ORIG_NODE(new_node, node);
4046 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4048 static ir_node *gen_be_Return(ir_node *node)
4050 ir_graph *irg = current_ir_graph;
4051 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4052 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4053 ir_node *new_ret_val = be_transform_node(ret_val);
4054 ir_node *new_ret_mem = be_transform_node(ret_mem);
4055 ir_entity *ent = get_irg_entity(irg);
4056 ir_type *tp = get_entity_type(ent);
4057 dbg_info *dbgi = get_irn_dbg_info(node);
4058 ir_node *block = be_transform_node(get_nodes_block(node));
4072 assert(ret_val != NULL);
4073 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4074 return be_duplicate_node(node);
4077 res_type = get_method_res_type(tp, 0);
4079 if (! is_Primitive_type(res_type)) {
4080 return be_duplicate_node(node);
4083 mode = get_type_mode(res_type);
4084 if (! mode_is_float(mode)) {
4085 return be_duplicate_node(node);
4088 assert(get_method_n_ress(tp) == 1);
4090 frame = get_irg_frame(irg);
4092 /* store xmm0 onto stack */
4093 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4094 new_ret_mem, new_ret_val);
4095 set_ia32_ls_mode(sse_store, mode);
4096 set_ia32_op_type(sse_store, ia32_AddrModeD);
4097 set_ia32_use_frame(sse_store);
4098 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4100 /* load into x87 register */
4101 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4102 set_ia32_op_type(fld, ia32_AddrModeS);
4103 set_ia32_use_frame(fld);
4105 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4106 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4108 /* create a new return */
4109 arity = get_irn_arity(node);
4110 in = ALLOCAN(ir_node*, arity);
4111 pop = be_Return_get_pop(node);
4112 for (i = 0; i < arity; ++i) {
4113 ir_node *op = get_irn_n(node, i);
4114 if (op == ret_val) {
4116 } else if (op == ret_mem) {
4119 in[i] = be_transform_node(op);
4122 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4123 copy_node_attr(irg, node, new_node);
4129 * Transform a be_AddSP into an ia32_SubSP.
4131 static ir_node *gen_be_AddSP(ir_node *node)
4133 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4134 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4136 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4137 match_am | match_immediate);
4138 assert(is_ia32_SubSP(new_node));
4139 arch_irn_set_register(new_node, pn_ia32_SubSP_stack,
4140 &ia32_registers[REG_ESP]);
4145 * Transform a be_SubSP into an ia32_AddSP
4147 static ir_node *gen_be_SubSP(ir_node *node)
4149 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4150 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4152 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4153 match_am | match_immediate);
4154 assert(is_ia32_AddSP(new_node));
4155 arch_irn_set_register(new_node, pn_ia32_AddSP_stack,
4156 &ia32_registers[REG_ESP]);
4161 * Change some phi modes
4163 static ir_node *gen_Phi(ir_node *node)
4165 const arch_register_req_t *req;
4166 ir_node *block = be_transform_node(get_nodes_block(node));
4167 ir_graph *irg = current_ir_graph;
4168 dbg_info *dbgi = get_irn_dbg_info(node);
4169 ir_mode *mode = get_irn_mode(node);
4172 if (ia32_mode_needs_gp_reg(mode)) {
4173 /* we shouldn't have any 64bit stuff around anymore */
4174 assert(get_mode_size_bits(mode) <= 32);
4175 /* all integer operations are on 32bit registers now */
4177 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4178 } else if (mode_is_float(mode)) {
4179 if (ia32_cg_config.use_sse2) {
4181 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4184 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4187 req = arch_no_register_req;
4190 /* phi nodes allow loops, so we use the old arguments for now
4191 * and fix this later */
4192 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4193 get_irn_in(node) + 1);
4194 copy_node_attr(irg, node, phi);
4195 be_duplicate_deps(node, phi);
4197 arch_set_out_register_req(phi, 0, req);
4199 be_enqueue_preds(node);
4204 static ir_node *gen_Jmp(ir_node *node)
4206 ir_node *block = get_nodes_block(node);
4207 ir_node *new_block = be_transform_node(block);
4208 dbg_info *dbgi = get_irn_dbg_info(node);
4211 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4212 SET_IA32_ORIG_NODE(new_node, node);
4220 static ir_node *gen_IJmp(ir_node *node)
4222 ir_node *block = get_nodes_block(node);
4223 ir_node *new_block = be_transform_node(block);
4224 dbg_info *dbgi = get_irn_dbg_info(node);
4225 ir_node *op = get_IJmp_target(node);
4227 ia32_address_mode_t am;
4228 ia32_address_t *addr = &am.addr;
4230 assert(get_irn_mode(op) == mode_P);
4232 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4234 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4235 addr->mem, am.new_op2);
4236 set_am_attributes(new_node, &am);
4237 SET_IA32_ORIG_NODE(new_node, node);
4239 new_node = fix_mem_proj(new_node, &am);
4244 static ir_node *gen_ia32_l_Add(ir_node *node)
4246 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4247 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4248 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4249 match_commutative | match_am | match_immediate |
4250 match_mode_neutral);
4252 if (is_Proj(lowered)) {
4253 lowered = get_Proj_pred(lowered);
4255 assert(is_ia32_Add(lowered));
4256 set_irn_mode(lowered, mode_T);
4262 static ir_node *gen_ia32_l_Adc(ir_node *node)
4264 return gen_binop_flags(node, new_bd_ia32_Adc,
4265 match_commutative | match_am | match_immediate |
4266 match_mode_neutral);
4270 * Transforms a l_MulS into a "real" MulS node.
4272 * @return the created ia32 Mul node
4274 static ir_node *gen_ia32_l_Mul(ir_node *node)
4276 ir_node *left = get_binop_left(node);
4277 ir_node *right = get_binop_right(node);
4279 return gen_binop(node, left, right, new_bd_ia32_Mul,
4280 match_commutative | match_am | match_mode_neutral);
4284 * Transforms a l_IMulS into a "real" IMul1OPS node.
4286 * @return the created ia32 IMul1OP node
4288 static ir_node *gen_ia32_l_IMul(ir_node *node)
4290 ir_node *left = get_binop_left(node);
4291 ir_node *right = get_binop_right(node);
4293 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4294 match_commutative | match_am | match_mode_neutral);
4297 static ir_node *gen_ia32_l_Sub(ir_node *node)
4299 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4300 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4301 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4302 match_am | match_immediate | match_mode_neutral);
4304 if (is_Proj(lowered)) {
4305 lowered = get_Proj_pred(lowered);
4307 assert(is_ia32_Sub(lowered));
4308 set_irn_mode(lowered, mode_T);
4314 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4316 return gen_binop_flags(node, new_bd_ia32_Sbb,
4317 match_am | match_immediate | match_mode_neutral);
4320 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4322 ir_node *src_block = get_nodes_block(node);
4323 ir_node *block = be_transform_node(src_block);
4324 ir_graph *irg = current_ir_graph;
4325 dbg_info *dbgi = get_irn_dbg_info(node);
4326 ir_node *frame = get_irg_frame(irg);
4327 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4328 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4329 ir_node *new_val_low = be_transform_node(val_low);
4330 ir_node *new_val_high = be_transform_node(val_high);
4332 ir_node *sync, *fild, *res;
4334 ir_node *store_high;
4338 if (ia32_cg_config.use_sse2) {
4339 panic("ia32_l_LLtoFloat not implemented for SSE2");
4343 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4345 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4347 SET_IA32_ORIG_NODE(store_low, node);
4348 SET_IA32_ORIG_NODE(store_high, node);
4350 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4351 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4353 set_ia32_use_frame(store_low);
4354 set_ia32_use_frame(store_high);
4355 set_ia32_op_type(store_low, ia32_AddrModeD);
4356 set_ia32_op_type(store_high, ia32_AddrModeD);
4357 set_ia32_ls_mode(store_low, mode_Iu);
4358 set_ia32_ls_mode(store_high, mode_Is);
4359 add_ia32_am_offs_int(store_high, 4);
4363 sync = new_rd_Sync(dbgi, block, 2, in);
4366 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4368 set_ia32_use_frame(fild);
4369 set_ia32_op_type(fild, ia32_AddrModeS);
4370 set_ia32_ls_mode(fild, mode_Ls);
4372 SET_IA32_ORIG_NODE(fild, node);
4374 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4376 if (! mode_is_signed(get_irn_mode(val_high))) {
4377 ia32_address_mode_t am;
4379 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4382 am.addr.base = get_symconst_base();
4383 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4384 am.addr.mem = nomem;
4387 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4388 am.addr.tls_segment = false;
4389 am.addr.use_frame = 0;
4390 am.addr.frame_entity = NULL;
4391 am.addr.symconst_sign = 0;
4392 am.ls_mode = mode_F;
4393 am.mem_proj = nomem;
4394 am.op_type = ia32_AddrModeS;
4396 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4397 am.pinned = op_pin_state_floats;
4399 am.ins_permuted = false;
4401 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4402 am.new_op1, am.new_op2, get_fpcw());
4403 set_am_attributes(fadd, &am);
4405 set_irn_mode(fadd, mode_T);
4406 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4411 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4413 ir_node *src_block = get_nodes_block(node);
4414 ir_node *block = be_transform_node(src_block);
4415 ir_graph *irg = get_Block_irg(block);
4416 dbg_info *dbgi = get_irn_dbg_info(node);
4417 ir_node *frame = get_irg_frame(irg);
4418 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4419 ir_node *new_val = be_transform_node(val);
4422 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4423 SET_IA32_ORIG_NODE(fist, node);
4424 set_ia32_use_frame(fist);
4425 set_ia32_op_type(fist, ia32_AddrModeD);
4426 set_ia32_ls_mode(fist, mode_Ls);
4428 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4429 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4432 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4434 ir_node *block = be_transform_node(get_nodes_block(node));
4435 ir_graph *irg = get_Block_irg(block);
4436 ir_node *pred = get_Proj_pred(node);
4437 ir_node *new_pred = be_transform_node(pred);
4438 ir_node *frame = get_irg_frame(irg);
4439 dbg_info *dbgi = get_irn_dbg_info(node);
4440 long pn = get_Proj_proj(node);
4445 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4446 SET_IA32_ORIG_NODE(load, node);
4447 set_ia32_use_frame(load);
4448 set_ia32_op_type(load, ia32_AddrModeS);
4449 set_ia32_ls_mode(load, mode_Iu);
4450 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4451 * 32 bit from it with this particular load */
4452 attr = get_ia32_attr(load);
4453 attr->data.need_64bit_stackent = 1;
4455 if (pn == pn_ia32_l_FloattoLL_res_high) {
4456 add_ia32_am_offs_int(load, 4);
4458 assert(pn == pn_ia32_l_FloattoLL_res_low);
4461 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4467 * Transform the Projs of an AddSP.
4469 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4471 ir_node *pred = get_Proj_pred(node);
4472 ir_node *new_pred = be_transform_node(pred);
4473 dbg_info *dbgi = get_irn_dbg_info(node);
4474 long proj = get_Proj_proj(node);
4476 if (proj == pn_be_AddSP_sp) {
4477 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4478 pn_ia32_SubSP_stack);
4479 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4481 } else if (proj == pn_be_AddSP_res) {
4482 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4483 pn_ia32_SubSP_addr);
4484 } else if (proj == pn_be_AddSP_M) {
4485 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4488 panic("No idea how to transform proj->AddSP");
4492 * Transform the Projs of a SubSP.
4494 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4496 ir_node *pred = get_Proj_pred(node);
4497 ir_node *new_pred = be_transform_node(pred);
4498 dbg_info *dbgi = get_irn_dbg_info(node);
4499 long proj = get_Proj_proj(node);
4501 if (proj == pn_be_SubSP_sp) {
4502 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4503 pn_ia32_AddSP_stack);
4504 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4506 } else if (proj == pn_be_SubSP_M) {
4507 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4510 panic("No idea how to transform proj->SubSP");
4514 * Transform and renumber the Projs from a Load.
4516 static ir_node *gen_Proj_Load(ir_node *node)
4519 ir_node *pred = get_Proj_pred(node);
4520 dbg_info *dbgi = get_irn_dbg_info(node);
4521 long proj = get_Proj_proj(node);
4523 /* loads might be part of source address mode matches, so we don't
4524 * transform the ProjMs yet (with the exception of loads whose result is
4527 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4530 /* this is needed, because sometimes we have loops that are only
4531 reachable through the ProjM */
4532 be_enqueue_preds(node);
4533 /* do it in 2 steps, to silence firm verifier */
4534 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4535 set_Proj_proj(res, pn_ia32_mem);
4539 /* renumber the proj */
4540 new_pred = be_transform_node(pred);
4541 if (is_ia32_Load(new_pred)) {
4542 switch ((pn_Load)proj) {
4544 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4546 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4547 case pn_Load_X_except:
4548 /* This Load might raise an exception. Mark it. */
4549 set_ia32_exc_label(new_pred, 1);
4550 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4551 case pn_Load_X_regular:
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4554 } else if (is_ia32_Conv_I2I(new_pred) ||
4555 is_ia32_Conv_I2I8Bit(new_pred)) {
4556 set_irn_mode(new_pred, mode_T);
4557 switch ((pn_Load)proj) {
4559 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4561 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4562 case pn_Load_X_except:
4563 /* This Load might raise an exception. Mark it. */
4564 set_ia32_exc_label(new_pred, 1);
4565 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4566 case pn_Load_X_regular:
4567 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4569 } else if (is_ia32_xLoad(new_pred)) {
4570 switch ((pn_Load)proj) {
4572 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4574 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4575 case pn_Load_X_except:
4576 /* This Load might raise an exception. Mark it. */
4577 set_ia32_exc_label(new_pred, 1);
4578 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4579 case pn_Load_X_regular:
4580 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4582 } else if (is_ia32_vfld(new_pred)) {
4583 switch ((pn_Load)proj) {
4585 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4587 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4588 case pn_Load_X_except:
4589 /* This Load might raise an exception. Mark it. */
4590 set_ia32_exc_label(new_pred, 1);
4591 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4592 case pn_Load_X_regular:
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4596 /* can happen for ProJMs when source address mode happened for the
4599 /* however it should not be the result proj, as that would mean the
4600 load had multiple users and should not have been used for
4602 if (proj != pn_Load_M) {
4603 panic("internal error: transformed node not a Load");
4605 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4608 panic("No idea how to transform Proj(Load) %+F", node);
4611 static ir_node *gen_Proj_Store(ir_node *node)
4613 ir_node *pred = get_Proj_pred(node);
4614 ir_node *new_pred = be_transform_node(pred);
4615 dbg_info *dbgi = get_irn_dbg_info(node);
4616 long pn = get_Proj_proj(node);
4618 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4619 switch ((pn_Store)pn) {
4621 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4622 case pn_Store_X_except:
4623 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4624 case pn_Store_X_regular:
4625 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4627 } else if (is_ia32_vfist(new_pred)) {
4628 switch ((pn_Store)pn) {
4630 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4631 case pn_Store_X_except:
4632 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4633 case pn_Store_X_regular:
4634 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4636 } else if (is_ia32_vfisttp(new_pred)) {
4637 switch ((pn_Store)pn) {
4639 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4640 case pn_Store_X_except:
4641 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4642 case pn_Store_X_regular:
4643 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4645 } else if (is_ia32_vfst(new_pred)) {
4646 switch ((pn_Store)pn) {
4648 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4649 case pn_Store_X_except:
4650 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4651 case pn_Store_X_regular:
4652 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4654 } else if (is_ia32_xStore(new_pred)) {
4655 switch ((pn_Store)pn) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4658 case pn_Store_X_except:
4659 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4660 case pn_Store_X_regular:
4661 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4663 } else if (is_Sync(new_pred)) {
4664 /* hack for the case that gen_float_const_Store produced a Sync */
4665 if (pn == pn_Store_M) {
4668 panic("exception control flow for gen_float_const_Store not implemented yet");
4669 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4670 /* destination address mode */
4671 if (pn == pn_Store_M) {
4674 panic("exception control flow for destination AM not implemented yet");
4677 panic("No idea how to transform Proj(Store) %+F", node);
4681 * Transform and renumber the Projs from a Div or Mod instruction.
4683 static ir_node *gen_Proj_Div(ir_node *node)
4685 ir_node *pred = get_Proj_pred(node);
4686 ir_node *new_pred = be_transform_node(pred);
4687 dbg_info *dbgi = get_irn_dbg_info(node);
4688 long proj = get_Proj_proj(node);
4690 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4691 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4693 switch ((pn_Div)proj) {
4695 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4696 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4697 } else if (is_ia32_xDiv(new_pred)) {
4698 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4699 } else if (is_ia32_vfdiv(new_pred)) {
4700 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4702 panic("Div transformed to unexpected thing %+F", new_pred);
4705 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4706 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4707 } else if (is_ia32_xDiv(new_pred)) {
4708 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4709 } else if (is_ia32_vfdiv(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4712 panic("Div transformed to unexpected thing %+F", new_pred);
4714 case pn_Div_X_except:
4715 set_ia32_exc_label(new_pred, 1);
4716 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4717 case pn_Div_X_regular:
4718 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4721 panic("No idea how to transform proj->Div");
4725 * Transform and renumber the Projs from a Div or Mod instruction.
4727 static ir_node *gen_Proj_Mod(ir_node *node)
4729 ir_node *pred = get_Proj_pred(node);
4730 ir_node *new_pred = be_transform_node(pred);
4731 dbg_info *dbgi = get_irn_dbg_info(node);
4732 long proj = get_Proj_proj(node);
4734 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4735 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4736 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4738 switch ((pn_Mod)proj) {
4740 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4742 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4743 case pn_Mod_X_except:
4744 set_ia32_exc_label(new_pred, 1);
4745 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4746 case pn_Mod_X_regular:
4747 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4749 panic("No idea how to transform proj->Mod");
4753 * Transform and renumber the Projs from a CopyB.
4755 static ir_node *gen_Proj_CopyB(ir_node *node)
4757 ir_node *pred = get_Proj_pred(node);
4758 ir_node *new_pred = be_transform_node(pred);
4759 dbg_info *dbgi = get_irn_dbg_info(node);
4760 long proj = get_Proj_proj(node);
4762 switch ((pn_CopyB)proj) {
4764 if (is_ia32_CopyB_i(new_pred)) {
4765 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4766 } else if (is_ia32_CopyB(new_pred)) {
4767 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4770 case pn_CopyB_X_regular:
4771 if (is_ia32_CopyB_i(new_pred)) {
4772 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4773 } else if (is_ia32_CopyB(new_pred)) {
4774 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4777 case pn_CopyB_X_except:
4778 if (is_ia32_CopyB_i(new_pred)) {
4779 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4780 } else if (is_ia32_CopyB(new_pred)) {
4781 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4786 panic("No idea how to transform proj->CopyB");
4789 static ir_node *gen_be_Call(ir_node *node)
4791 dbg_info *const dbgi = get_irn_dbg_info(node);
4792 ir_node *const src_block = get_nodes_block(node);
4793 ir_node *const block = be_transform_node(src_block);
4794 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4795 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4796 ir_node *const sp = be_transform_node(src_sp);
4797 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4798 ia32_address_mode_t am;
4799 ia32_address_t *const addr = &am.addr;
4804 ir_node * eax = noreg_GP;
4805 ir_node * ecx = noreg_GP;
4806 ir_node * edx = noreg_GP;
4807 unsigned const pop = be_Call_get_pop(node);
4808 ir_type *const call_tp = be_Call_get_type(node);
4809 int old_no_pic_adjust;
4810 int throws_exception = ir_throws_exception(node);
4812 /* Run the x87 simulator if the call returns a float value */
4813 if (get_method_n_ress(call_tp) > 0) {
4814 ir_type *const res_type = get_method_res_type(call_tp, 0);
4815 ir_mode *const res_mode = get_type_mode(res_type);
4817 if (res_mode != NULL && mode_is_float(res_mode)) {
4818 ir_graph *irg = current_ir_graph;
4819 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4820 irg_data->do_x87_sim = 1;
4824 /* We do not want be_Call direct calls */
4825 assert(be_Call_get_entity(node) == NULL);
4827 /* special case for PIC trampoline calls */
4828 old_no_pic_adjust = ia32_no_pic_adjust;
4829 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4831 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4832 match_am | match_immediate);
4834 ia32_no_pic_adjust = old_no_pic_adjust;
4836 i = get_irn_arity(node) - 1;
4837 fpcw = be_transform_node(get_irn_n(node, i--));
4838 for (; i >= n_be_Call_first_arg; --i) {
4839 arch_register_req_t const *const req = arch_get_register_req(node, i);
4840 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4842 assert(req->type == arch_register_req_type_limited);
4843 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4845 switch (*req->limited) {
4846 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4847 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4848 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4849 default: panic("Invalid GP register for register parameter");
4853 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4854 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4855 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4856 ir_set_throws_exception(call, throws_exception);
4857 set_am_attributes(call, &am);
4858 call = fix_mem_proj(call, &am);
4860 if (get_irn_pinned(node) == op_pin_state_pinned)
4861 set_irn_pinned(call, op_pin_state_pinned);
4863 SET_IA32_ORIG_NODE(call, node);
4865 if (ia32_cg_config.use_sse2) {
4866 /* remember this call for post-processing */
4867 ARR_APP1(ir_node *, call_list, call);
4868 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4875 * Transform Builtin trap
4877 static ir_node *gen_trap(ir_node *node)
4879 dbg_info *dbgi = get_irn_dbg_info(node);
4880 ir_node *block = be_transform_node(get_nodes_block(node));
4881 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4883 return new_bd_ia32_UD2(dbgi, block, mem);
4887 * Transform Builtin debugbreak
4889 static ir_node *gen_debugbreak(ir_node *node)
4891 dbg_info *dbgi = get_irn_dbg_info(node);
4892 ir_node *block = be_transform_node(get_nodes_block(node));
4893 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4895 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4899 * Transform Builtin return_address
4901 static ir_node *gen_return_address(ir_node *node)
4903 ir_node *param = get_Builtin_param(node, 0);
4904 ir_node *frame = get_Builtin_param(node, 1);
4905 dbg_info *dbgi = get_irn_dbg_info(node);
4906 ir_tarval *tv = get_Const_tarval(param);
4907 ir_graph *irg = get_irn_irg(node);
4908 unsigned long value = get_tarval_long(tv);
4910 ir_node *block = be_transform_node(get_nodes_block(node));
4911 ir_node *ptr = be_transform_node(frame);
4915 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4916 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4917 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4920 /* load the return address from this frame */
4921 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4923 set_irn_pinned(load, get_irn_pinned(node));
4924 set_ia32_op_type(load, ia32_AddrModeS);
4925 set_ia32_ls_mode(load, mode_Iu);
4927 set_ia32_am_offs_int(load, 0);
4928 set_ia32_use_frame(load);
4929 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4931 if (get_irn_pinned(node) == op_pin_state_floats) {
4932 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4933 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4934 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4935 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4938 SET_IA32_ORIG_NODE(load, node);
4939 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4943 * Transform Builtin frame_address
4945 static ir_node *gen_frame_address(ir_node *node)
4947 ir_node *param = get_Builtin_param(node, 0);
4948 ir_node *frame = get_Builtin_param(node, 1);
4949 dbg_info *dbgi = get_irn_dbg_info(node);
4950 ir_tarval *tv = get_Const_tarval(param);
4951 ir_graph *irg = get_irn_irg(node);
4952 unsigned long value = get_tarval_long(tv);
4954 ir_node *block = be_transform_node(get_nodes_block(node));
4955 ir_node *ptr = be_transform_node(frame);
4960 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4961 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4962 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4965 /* load the frame address from this frame */
4966 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4968 set_irn_pinned(load, get_irn_pinned(node));
4969 set_ia32_op_type(load, ia32_AddrModeS);
4970 set_ia32_ls_mode(load, mode_Iu);
4972 ent = ia32_get_frame_address_entity(irg);
4974 set_ia32_am_offs_int(load, 0);
4975 set_ia32_use_frame(load);
4976 set_ia32_frame_ent(load, ent);
4978 /* will fail anyway, but gcc does this: */
4979 set_ia32_am_offs_int(load, 0);
4982 if (get_irn_pinned(node) == op_pin_state_floats) {
4983 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4984 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4985 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4986 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4989 SET_IA32_ORIG_NODE(load, node);
4990 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4994 * Transform Builtin frame_address
4996 static ir_node *gen_prefetch(ir_node *node)
4999 ir_node *ptr, *block, *mem, *base, *index;
5000 ir_node *param, *new_node;
5003 ia32_address_t addr;
5005 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5006 /* no prefetch at all, route memory */
5007 return be_transform_node(get_Builtin_mem(node));
5010 param = get_Builtin_param(node, 1);
5011 tv = get_Const_tarval(param);
5012 rw = get_tarval_long(tv);
5014 /* construct load address */
5015 memset(&addr, 0, sizeof(addr));
5016 ptr = get_Builtin_param(node, 0);
5017 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5024 base = be_transform_node(base);
5027 if (index == NULL) {
5030 index = be_transform_node(index);
5033 dbgi = get_irn_dbg_info(node);
5034 block = be_transform_node(get_nodes_block(node));
5035 mem = be_transform_node(get_Builtin_mem(node));
5037 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5038 /* we have 3DNow!, this was already checked above */
5039 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
5040 } else if (ia32_cg_config.use_sse_prefetch) {
5041 /* note: rw == 1 is IGNORED in that case */
5042 param = get_Builtin_param(node, 2);
5043 tv = get_Const_tarval(param);
5044 locality = get_tarval_long(tv);
5046 /* SSE style prefetch */
5049 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
5052 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
5055 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
5058 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
5062 assert(ia32_cg_config.use_3dnow_prefetch);
5063 /* 3DNow! style prefetch */
5064 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
5067 set_irn_pinned(new_node, get_irn_pinned(node));
5068 set_ia32_op_type(new_node, ia32_AddrModeS);
5069 set_ia32_ls_mode(new_node, mode_Bu);
5070 set_address(new_node, &addr);
5072 SET_IA32_ORIG_NODE(new_node, node);
5074 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5078 * Transform bsf like node
5080 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5082 ir_node *param = get_Builtin_param(node, 0);
5083 dbg_info *dbgi = get_irn_dbg_info(node);
5085 ir_node *block = get_nodes_block(node);
5086 ir_node *new_block = be_transform_node(block);
5088 ia32_address_mode_t am;
5089 ia32_address_t *addr = &am.addr;
5092 match_arguments(&am, block, NULL, param, NULL, match_am);
5094 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5095 set_am_attributes(cnt, &am);
5096 set_ia32_ls_mode(cnt, get_irn_mode(param));
5098 SET_IA32_ORIG_NODE(cnt, node);
5099 return fix_mem_proj(cnt, &am);
5103 * Transform builtin ffs.
5105 static ir_node *gen_ffs(ir_node *node)
5107 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5108 ir_node *real = skip_Proj(bsf);
5109 dbg_info *dbgi = get_irn_dbg_info(real);
5110 ir_node *block = get_nodes_block(real);
5111 ir_node *flag, *set, *conv, *neg, *orn, *add;
5114 if (get_irn_mode(real) != mode_T) {
5115 set_irn_mode(real, mode_T);
5116 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5119 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5122 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5123 SET_IA32_ORIG_NODE(set, node);
5126 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5127 SET_IA32_ORIG_NODE(conv, node);
5130 neg = new_bd_ia32_Neg(dbgi, block, conv);
5133 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5134 set_ia32_commutative(orn);
5137 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5138 add_ia32_am_offs_int(add, 1);
5143 * Transform builtin clz.
5145 static ir_node *gen_clz(ir_node *node)
5147 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5148 ir_node *real = skip_Proj(bsr);
5149 dbg_info *dbgi = get_irn_dbg_info(real);
5150 ir_node *block = get_nodes_block(real);
5151 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5153 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5157 * Transform builtin ctz.
5159 static ir_node *gen_ctz(ir_node *node)
5161 return gen_unop_AM(node, new_bd_ia32_Bsf);
5165 * Transform builtin parity.
5167 static ir_node *gen_parity(ir_node *node)
5169 dbg_info *dbgi = get_irn_dbg_info(node);
5170 ir_node *block = get_nodes_block(node);
5171 ir_node *new_block = be_transform_node(block);
5172 ir_node *param = get_Builtin_param(node, 0);
5173 ir_node *new_param = be_transform_node(param);
5176 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5177 * so we have to do complicated xoring first.
5178 * (we should also better lower this before the backend so we still have a
5179 * chance for CSE, constant folding and other goodies for some of these
5182 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5183 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5184 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5186 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5189 set_irn_mode(xor2, mode_T);
5190 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5193 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5194 SET_IA32_ORIG_NODE(new_node, node);
5197 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5198 nomem, new_node, mode_Bu);
5199 SET_IA32_ORIG_NODE(new_node, node);
5204 * Transform builtin popcount
5206 static ir_node *gen_popcount(ir_node *node)
5208 ir_node *param = get_Builtin_param(node, 0);
5209 dbg_info *dbgi = get_irn_dbg_info(node);
5211 ir_node *block = get_nodes_block(node);
5212 ir_node *new_block = be_transform_node(block);
5215 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5217 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5218 if (ia32_cg_config.use_popcnt) {
5219 ia32_address_mode_t am;
5220 ia32_address_t *addr = &am.addr;
5223 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5225 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5226 set_am_attributes(cnt, &am);
5227 set_ia32_ls_mode(cnt, get_irn_mode(param));
5229 SET_IA32_ORIG_NODE(cnt, node);
5230 return fix_mem_proj(cnt, &am);
5233 new_param = be_transform_node(param);
5235 /* do the standard popcount algo */
5236 /* TODO: This is stupid, we should transform this before the backend,
5237 * to get CSE, localopts, etc. for the operations
5238 * TODO: This is also not the optimal algorithm (it is just the starting
5239 * example in hackers delight, they optimize it more on the following page)
5240 * But I'm too lazy to fix this now, as the code should get lowered before
5241 * the backend anyway.
5244 /* m1 = x & 0x55555555 */
5245 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5246 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5249 simm = ia32_create_Immediate(NULL, 0, 1);
5250 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5252 /* m2 = s1 & 0x55555555 */
5253 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5256 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5258 /* m4 = m3 & 0x33333333 */
5259 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5260 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5263 simm = ia32_create_Immediate(NULL, 0, 2);
5264 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5266 /* m5 = s2 & 0x33333333 */
5267 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5270 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5272 /* m7 = m6 & 0x0F0F0F0F */
5273 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5274 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5277 simm = ia32_create_Immediate(NULL, 0, 4);
5278 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5280 /* m8 = s3 & 0x0F0F0F0F */
5281 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5284 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5286 /* m10 = m9 & 0x00FF00FF */
5287 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5288 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5291 simm = ia32_create_Immediate(NULL, 0, 8);
5292 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5294 /* m11 = s4 & 0x00FF00FF */
5295 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5297 /* m12 = m10 + m11 */
5298 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5300 /* m13 = m12 & 0x0000FFFF */
5301 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5302 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5304 /* s5 = m12 >> 16 */
5305 simm = ia32_create_Immediate(NULL, 0, 16);
5306 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5308 /* res = m13 + s5 */
5309 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5313 * Transform builtin byte swap.
5315 static ir_node *gen_bswap(ir_node *node)
5317 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5318 dbg_info *dbgi = get_irn_dbg_info(node);
5320 ir_node *block = get_nodes_block(node);
5321 ir_node *new_block = be_transform_node(block);
5322 ir_mode *mode = get_irn_mode(param);
5323 unsigned size = get_mode_size_bits(mode);
5324 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5328 if (ia32_cg_config.use_i486) {
5329 /* swap available */
5330 return new_bd_ia32_Bswap(dbgi, new_block, param);
5332 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5333 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5335 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5336 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5338 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5340 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5341 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5343 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5344 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5347 /* swap16 always available */
5348 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5351 panic("Invalid bswap size (%d)", size);
5356 * Transform builtin outport.
5358 static ir_node *gen_outport(ir_node *node)
5360 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5361 ir_node *oldv = get_Builtin_param(node, 1);
5362 ir_mode *mode = get_irn_mode(oldv);
5363 ir_node *value = be_transform_node(oldv);
5364 ir_node *block = be_transform_node(get_nodes_block(node));
5365 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5366 dbg_info *dbgi = get_irn_dbg_info(node);
5368 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5369 set_ia32_ls_mode(res, mode);
5374 * Transform builtin inport.
5376 static ir_node *gen_inport(ir_node *node)
5378 ir_type *tp = get_Builtin_type(node);
5379 ir_type *rstp = get_method_res_type(tp, 0);
5380 ir_mode *mode = get_type_mode(rstp);
5381 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5382 ir_node *block = be_transform_node(get_nodes_block(node));
5383 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5384 dbg_info *dbgi = get_irn_dbg_info(node);
5386 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5387 set_ia32_ls_mode(res, mode);
5389 /* check for missing Result Proj */
5394 * Transform a builtin inner trampoline
5396 static ir_node *gen_inner_trampoline(ir_node *node)
5398 ir_node *ptr = get_Builtin_param(node, 0);
5399 ir_node *callee = get_Builtin_param(node, 1);
5400 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5401 ir_node *mem = get_Builtin_mem(node);
5402 ir_node *block = get_nodes_block(node);
5403 ir_node *new_block = be_transform_node(block);
5407 ir_node *trampoline;
5409 dbg_info *dbgi = get_irn_dbg_info(node);
5410 ia32_address_t addr;
5412 /* construct store address */
5413 memset(&addr, 0, sizeof(addr));
5414 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5416 if (addr.base == NULL) {
5417 addr.base = noreg_GP;
5419 addr.base = be_transform_node(addr.base);
5422 if (addr.index == NULL) {
5423 addr.index = noreg_GP;
5425 addr.index = be_transform_node(addr.index);
5427 addr.mem = be_transform_node(mem);
5429 /* mov ecx, <env> */
5430 val = ia32_create_Immediate(NULL, 0, 0xB9);
5431 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5432 addr.index, addr.mem, val);
5433 set_irn_pinned(store, get_irn_pinned(node));
5434 set_ia32_op_type(store, ia32_AddrModeD);
5435 set_ia32_ls_mode(store, mode_Bu);
5436 set_address(store, &addr);
5440 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5441 addr.index, addr.mem, env);
5442 set_irn_pinned(store, get_irn_pinned(node));
5443 set_ia32_op_type(store, ia32_AddrModeD);
5444 set_ia32_ls_mode(store, mode_Iu);
5445 set_address(store, &addr);
5449 /* jmp rel <callee> */
5450 val = ia32_create_Immediate(NULL, 0, 0xE9);
5451 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5452 addr.index, addr.mem, val);
5453 set_irn_pinned(store, get_irn_pinned(node));
5454 set_ia32_op_type(store, ia32_AddrModeD);
5455 set_ia32_ls_mode(store, mode_Bu);
5456 set_address(store, &addr);
5460 trampoline = be_transform_node(ptr);
5462 /* the callee is typically an immediate */
5463 if (is_SymConst(callee)) {
5464 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5466 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5467 add_ia32_am_offs_int(rel, -10);
5469 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5471 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5472 addr.index, addr.mem, rel);
5473 set_irn_pinned(store, get_irn_pinned(node));
5474 set_ia32_op_type(store, ia32_AddrModeD);
5475 set_ia32_ls_mode(store, mode_Iu);
5476 set_address(store, &addr);
5481 return new_r_Tuple(new_block, 2, in);
5485 * Transform Builtin node.
5487 static ir_node *gen_Builtin(ir_node *node)
5489 ir_builtin_kind kind = get_Builtin_kind(node);
5493 return gen_trap(node);
5494 case ir_bk_debugbreak:
5495 return gen_debugbreak(node);
5496 case ir_bk_return_address:
5497 return gen_return_address(node);
5498 case ir_bk_frame_address:
5499 return gen_frame_address(node);
5500 case ir_bk_prefetch:
5501 return gen_prefetch(node);
5503 return gen_ffs(node);
5505 return gen_clz(node);
5507 return gen_ctz(node);
5509 return gen_parity(node);
5510 case ir_bk_popcount:
5511 return gen_popcount(node);
5513 return gen_bswap(node);
5515 return gen_outport(node);
5517 return gen_inport(node);
5518 case ir_bk_inner_trampoline:
5519 return gen_inner_trampoline(node);
5521 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5525 * Transform Proj(Builtin) node.
5527 static ir_node *gen_Proj_Builtin(ir_node *proj)
5529 ir_node *node = get_Proj_pred(proj);
5530 ir_node *new_node = be_transform_node(node);
5531 ir_builtin_kind kind = get_Builtin_kind(node);
5534 case ir_bk_return_address:
5535 case ir_bk_frame_address:
5540 case ir_bk_popcount:
5542 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5545 case ir_bk_debugbreak:
5546 case ir_bk_prefetch:
5548 assert(get_Proj_proj(proj) == pn_Builtin_M);
5551 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5552 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5554 assert(get_Proj_proj(proj) == pn_Builtin_M);
5555 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5557 case ir_bk_inner_trampoline:
5558 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5559 return get_Tuple_pred(new_node, 1);
5561 assert(get_Proj_proj(proj) == pn_Builtin_M);
5562 return get_Tuple_pred(new_node, 0);
5565 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5568 static ir_node *gen_be_IncSP(ir_node *node)
5570 ir_node *res = be_duplicate_node(node);
5571 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5577 * Transform the Projs from a be_Call.
5579 static ir_node *gen_Proj_be_Call(ir_node *node)
5581 ir_node *call = get_Proj_pred(node);
5582 ir_node *new_call = be_transform_node(call);
5583 dbg_info *dbgi = get_irn_dbg_info(node);
5584 long proj = get_Proj_proj(node);
5585 ir_mode *mode = get_irn_mode(node);
5588 if (proj == pn_be_Call_M) {
5589 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5591 /* transform call modes */
5592 if (mode_is_data(mode)) {
5593 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5597 /* Map from be_Call to ia32_Call proj number */
5598 if (proj == pn_be_Call_sp) {
5599 proj = pn_ia32_Call_stack;
5600 } else if (proj == pn_be_Call_M) {
5601 proj = pn_ia32_Call_M;
5602 } else if (proj == pn_be_Call_X_except) {
5603 proj = pn_ia32_Call_X_except;
5604 } else if (proj == pn_be_Call_X_regular) {
5605 proj = pn_ia32_Call_X_regular;
5607 arch_register_req_t const *const req = arch_get_register_req_out(node);
5608 int const n_outs = arch_irn_get_n_outs(new_call);
5611 assert(proj >= pn_be_Call_first_res);
5612 assert(req->type & arch_register_req_type_limited);
5614 for (i = 0; i < n_outs; ++i) {
5615 arch_register_req_t const *const new_req
5616 = arch_get_out_register_req(new_call, i);
5618 if (!(new_req->type & arch_register_req_type_limited) ||
5619 new_req->cls != req->cls ||
5620 *new_req->limited != *req->limited)
5629 res = new_rd_Proj(dbgi, new_call, mode, proj);
5631 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5633 case pn_ia32_Call_stack:
5634 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5637 case pn_ia32_Call_fpcw:
5638 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5646 * Transform the Projs from a Cmp.
5648 static ir_node *gen_Proj_Cmp(ir_node *node)
5650 /* this probably means not all mode_b nodes were lowered... */
5651 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5655 static ir_node *gen_Proj_ASM(ir_node *node)
5657 ir_mode *mode = get_irn_mode(node);
5658 ir_node *pred = get_Proj_pred(node);
5659 ir_node *new_pred = be_transform_node(pred);
5660 long pos = get_Proj_proj(node);
5662 if (mode == mode_M) {
5663 pos = arch_irn_get_n_outs(new_pred)-1;
5664 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5666 } else if (mode_is_float(mode)) {
5669 panic("unexpected proj mode at ASM");
5672 return new_r_Proj(new_pred, mode, pos);
5676 * Transform and potentially renumber Proj nodes.
5678 static ir_node *gen_Proj(ir_node *node)
5680 ir_node *pred = get_Proj_pred(node);
5683 switch (get_irn_opcode(pred)) {
5685 return gen_Proj_Load(node);
5687 return gen_Proj_Store(node);
5689 return gen_Proj_ASM(node);
5691 return gen_Proj_Builtin(node);
5693 return gen_Proj_Div(node);
5695 return gen_Proj_Mod(node);
5697 return gen_Proj_CopyB(node);
5699 return gen_Proj_be_SubSP(node);
5701 return gen_Proj_be_AddSP(node);
5703 return gen_Proj_be_Call(node);
5705 return gen_Proj_Cmp(node);
5707 proj = get_Proj_proj(node);
5709 case pn_Start_X_initial_exec: {
5710 ir_node *block = get_nodes_block(pred);
5711 ir_node *new_block = be_transform_node(block);
5712 dbg_info *dbgi = get_irn_dbg_info(node);
5713 /* we exchange the ProjX with a jump */
5714 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5722 if (is_ia32_l_FloattoLL(pred)) {
5723 return gen_Proj_l_FloattoLL(node);
5725 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5729 ir_mode *mode = get_irn_mode(node);
5730 if (ia32_mode_needs_gp_reg(mode)) {
5731 ir_node *new_pred = be_transform_node(pred);
5732 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5733 get_Proj_proj(node));
5734 new_proj->node_nr = node->node_nr;
5739 return be_duplicate_node(node);
5743 * Enters all transform functions into the generic pointer
5745 static void register_transformers(void)
5747 /* first clear the generic function pointer for all ops */
5748 be_start_transform_setup();
5750 be_set_transform_function(op_Add, gen_Add);
5751 be_set_transform_function(op_And, gen_And);
5752 be_set_transform_function(op_ASM, ia32_gen_ASM);
5753 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5754 be_set_transform_function(op_be_Call, gen_be_Call);
5755 be_set_transform_function(op_be_Copy, gen_be_Copy);
5756 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5757 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5758 be_set_transform_function(op_be_Return, gen_be_Return);
5759 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5760 be_set_transform_function(op_Builtin, gen_Builtin);
5761 be_set_transform_function(op_Cmp, gen_Cmp);
5762 be_set_transform_function(op_Cond, gen_Cond);
5763 be_set_transform_function(op_Const, gen_Const);
5764 be_set_transform_function(op_Conv, gen_Conv);
5765 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5766 be_set_transform_function(op_Div, gen_Div);
5767 be_set_transform_function(op_Eor, gen_Eor);
5768 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5769 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5770 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5771 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5772 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5773 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5774 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5775 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5776 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5777 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5778 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5779 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5780 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5781 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5782 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5783 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5784 be_set_transform_function(op_IJmp, gen_IJmp);
5785 be_set_transform_function(op_Jmp, gen_Jmp);
5786 be_set_transform_function(op_Load, gen_Load);
5787 be_set_transform_function(op_Minus, gen_Minus);
5788 be_set_transform_function(op_Mod, gen_Mod);
5789 be_set_transform_function(op_Mul, gen_Mul);
5790 be_set_transform_function(op_Mulh, gen_Mulh);
5791 be_set_transform_function(op_Mux, gen_Mux);
5792 be_set_transform_function(op_Not, gen_Not);
5793 be_set_transform_function(op_Or, gen_Or);
5794 be_set_transform_function(op_Phi, gen_Phi);
5795 be_set_transform_function(op_Proj, gen_Proj);
5796 be_set_transform_function(op_Rotl, gen_Rotl);
5797 be_set_transform_function(op_Shl, gen_Shl);
5798 be_set_transform_function(op_Shr, gen_Shr);
5799 be_set_transform_function(op_Shrs, gen_Shrs);
5800 be_set_transform_function(op_Store, gen_Store);
5801 be_set_transform_function(op_Sub, gen_Sub);
5802 be_set_transform_function(op_SymConst, gen_SymConst);
5803 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5807 * Pre-transform all unknown and noreg nodes.
5809 static void ia32_pretransform_node(void)
5811 ir_graph *irg = current_ir_graph;
5812 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5814 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5815 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5816 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5817 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5818 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5820 nomem = get_irg_no_mem(irg);
5821 noreg_GP = ia32_new_NoReg_gp(irg);
5825 * Post-process all calls if we are in SSE mode.
5826 * The ABI requires that the results are in st0, copy them
5827 * to a xmm register.
5829 static void postprocess_fp_call_results(void)
5833 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5834 ir_node *call = call_list[i];
5835 ir_type *mtp = call_types[i];
5838 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5839 ir_type *res_tp = get_method_res_type(mtp, j);
5840 ir_node *res, *new_res;
5841 const ir_edge_t *edge, *next;
5844 if (! is_atomic_type(res_tp)) {
5845 /* no floating point return */
5848 mode = get_type_mode(res_tp);
5849 if (! mode_is_float(mode)) {
5850 /* no floating point return */
5854 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5857 /* now patch the users */
5858 foreach_out_edge_safe(res, edge, next) {
5859 ir_node *succ = get_edge_src_irn(edge);
5862 if (be_is_Keep(succ))
5865 if (is_ia32_xStore(succ)) {
5866 /* an xStore can be patched into an vfst */
5867 dbg_info *db = get_irn_dbg_info(succ);
5868 ir_node *block = get_nodes_block(succ);
5869 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5870 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5871 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5872 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5873 ir_mode *mode = get_ia32_ls_mode(succ);
5875 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5876 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5877 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5878 if (is_ia32_use_frame(succ))
5879 set_ia32_use_frame(st);
5880 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5881 set_irn_pinned(st, get_irn_pinned(succ));
5882 set_ia32_op_type(st, ia32_AddrModeD);
5884 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5885 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5886 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5889 } else if (new_res == NULL) {
5890 dbg_info *db = get_irn_dbg_info(call);
5891 ir_node *block = get_nodes_block(call);
5892 ir_node *frame = get_irg_frame(current_ir_graph);
5893 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5894 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5895 ir_node *vfst, *xld, *new_mem;
5898 /* store st(0) on stack */
5899 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5901 set_ia32_op_type(vfst, ia32_AddrModeD);
5902 set_ia32_use_frame(vfst);
5904 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5906 /* load into SSE register */
5907 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5909 set_ia32_op_type(xld, ia32_AddrModeS);
5910 set_ia32_use_frame(xld);
5912 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5913 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5915 if (old_mem != NULL) {
5916 edges_reroute(old_mem, new_mem);
5920 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5926 /* do the transformation */
5927 void ia32_transform_graph(ir_graph *irg)
5931 register_transformers();
5932 initial_fpcw = NULL;
5933 ia32_no_pic_adjust = 0;
5935 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5937 be_timer_push(T_HEIGHTS);
5938 ia32_heights = heights_new(irg);
5939 be_timer_pop(T_HEIGHTS);
5940 ia32_calculate_non_address_mode_nodes(irg);
5942 /* the transform phase is not safe for CSE (yet) because several nodes get
5943 * attributes set after their creation */
5944 cse_last = get_opt_cse();
5947 call_list = NEW_ARR_F(ir_node *, 0);
5948 call_types = NEW_ARR_F(ir_type *, 0);
5949 be_transform_graph(irg, ia32_pretransform_node);
5951 if (ia32_cg_config.use_sse2)
5952 postprocess_fp_call_results();
5953 DEL_ARR_F(call_types);
5954 DEL_ARR_F(call_list);
5956 set_opt_cse(cse_last);
5958 ia32_free_non_address_mode_nodes();
5959 heights_free(ia32_heights);
5960 ia32_heights = NULL;
5963 void ia32_init_transform(void)
5965 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");