2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *old_initial_fpcw = NULL;
94 static ir_node *initial_fpcw = NULL;
95 int ia32_no_pic_adjust;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 ir_tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 ir_tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 ir_graph *irg = current_ir_graph;
205 if (be_get_irg_options(irg)->pic) {
206 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
207 return arch_env->impl->get_pic_base(irg);
214 * Transforms a Const.
216 static ir_node *gen_Const(ir_node *node)
218 ir_node *old_block = get_nodes_block(node);
219 ir_node *block = be_transform_node(old_block);
220 dbg_info *dbgi = get_irn_dbg_info(node);
221 ir_mode *mode = get_irn_mode(node);
223 assert(is_Const(node));
225 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
263 #ifdef CONSTRUCT_SSE_CONST
264 if (mode == mode_D) {
265 unsigned val = get_tarval_sub_bits(tv, 0) |
266 (get_tarval_sub_bits(tv, 1) << 8) |
267 (get_tarval_sub_bits(tv, 2) << 16) |
268 (get_tarval_sub_bits(tv, 3) << 24);
270 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
271 ir_node *cnst, *psllq;
273 /* fine, lower 32bit are zero, produce 32bit value */
274 val = get_tarval_sub_bits(tv, 4) |
275 (get_tarval_sub_bits(tv, 5) << 8) |
276 (get_tarval_sub_bits(tv, 6) << 16) |
277 (get_tarval_sub_bits(tv, 7) << 24);
278 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
279 load = new_bd_ia32_xMovd(dbgi, block, cnst);
280 set_ia32_ls_mode(load, mode);
281 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
282 set_ia32_ls_mode(psllq, mode);
287 #endif /* CONSTRUCT_SSE_CONST */
288 floatent = ia32_create_float_const_entity(node);
290 base = get_symconst_base();
291 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
293 set_ia32_op_type(load, ia32_AddrModeS);
294 set_ia32_am_sc(load, floatent);
295 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
296 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
299 if (is_Const_null(node)) {
300 load = new_bd_ia32_vfldz(dbgi, block);
302 set_ia32_ls_mode(load, mode);
303 } else if (is_Const_one(node)) {
304 load = new_bd_ia32_vfld1(dbgi, block);
306 set_ia32_ls_mode(load, mode);
311 floatent = ia32_create_float_const_entity(node);
312 /* create_float_const_ent is smart and sometimes creates
314 ls_mode = get_type_mode(get_entity_type(floatent));
315 base = get_symconst_base();
316 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
318 set_ia32_op_type(load, ia32_AddrModeS);
319 set_ia32_am_sc(load, floatent);
320 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
321 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
324 #ifdef CONSTRUCT_SSE_CONST
326 #endif /* CONSTRUCT_SSE_CONST */
327 SET_IA32_ORIG_NODE(load, node);
329 } else { /* non-float mode */
331 ir_tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 if (get_entity_owner(entity) == get_tls_type()) {
375 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
376 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
377 set_ia32_am_sc(lea, entity);
380 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
384 SET_IA32_ORIG_NODE(cnst, node);
390 * Create a float type for the given mode and cache it.
392 * @param mode the mode for the float type (might be integer mode for SSE2 types)
393 * @param align alignment
395 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
401 if (mode == mode_Iu) {
402 static ir_type *int_Iu[16] = {NULL, };
404 if (int_Iu[align] == NULL) {
405 int_Iu[align] = tp = new_type_primitive(mode);
406 /* set the specified alignment */
407 set_type_alignment_bytes(tp, align);
409 return int_Iu[align];
410 } else if (mode == mode_Lu) {
411 static ir_type *int_Lu[16] = {NULL, };
413 if (int_Lu[align] == NULL) {
414 int_Lu[align] = tp = new_type_primitive(mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return int_Lu[align];
419 } else if (mode == mode_F) {
420 static ir_type *float_F[16] = {NULL, };
422 if (float_F[align] == NULL) {
423 float_F[align] = tp = new_type_primitive(mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_F[align];
428 } else if (mode == mode_D) {
429 static ir_type *float_D[16] = {NULL, };
431 if (float_D[align] == NULL) {
432 float_D[align] = tp = new_type_primitive(mode);
433 /* set the specified alignment */
434 set_type_alignment_bytes(tp, align);
436 return float_D[align];
438 static ir_type *float_E[16] = {NULL, };
440 if (float_E[align] == NULL) {
441 float_E[align] = tp = new_type_primitive(mode);
442 /* set the specified alignment */
443 set_type_alignment_bytes(tp, align);
445 return float_E[align];
450 * Create a float[2] array type for the given atomic type.
452 * @param tp the atomic type
454 static ir_type *ia32_create_float_array(ir_type *tp)
456 ir_mode *mode = get_type_mode(tp);
457 unsigned align = get_type_alignment_bytes(tp);
462 if (mode == mode_F) {
463 static ir_type *float_F[16] = {NULL, };
465 if (float_F[align] != NULL)
466 return float_F[align];
467 arr = float_F[align] = new_type_array(1, tp);
468 } else if (mode == mode_D) {
469 static ir_type *float_D[16] = {NULL, };
471 if (float_D[align] != NULL)
472 return float_D[align];
473 arr = float_D[align] = new_type_array(1, tp);
475 static ir_type *float_E[16] = {NULL, };
477 if (float_E[align] != NULL)
478 return float_E[align];
479 arr = float_E[align] = new_type_array(1, tp);
481 set_type_alignment_bytes(arr, align);
482 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
483 set_type_state(arr, layout_fixed);
487 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
488 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
490 static const struct {
491 const char *ent_name;
492 const char *cnst_str;
495 } names [ia32_known_const_max] = {
496 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
497 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
498 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
499 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
500 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
502 static ir_entity *ent_cache[ia32_known_const_max];
504 const char *ent_name, *cnst_str;
510 ent_name = names[kct].ent_name;
511 if (! ent_cache[kct]) {
512 cnst_str = names[kct].cnst_str;
514 switch (names[kct].mode) {
515 case 0: mode = mode_Iu; break;
516 case 1: mode = mode_Lu; break;
517 default: mode = mode_F; break;
519 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
520 tp = ia32_create_float_type(mode, names[kct].align);
522 if (kct == ia32_ULLBIAS)
523 tp = ia32_create_float_array(tp);
524 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
526 set_entity_ld_ident(ent, get_entity_ident(ent));
527 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
528 set_entity_visibility(ent, ir_visibility_private);
530 if (kct == ia32_ULLBIAS) {
531 ir_initializer_t *initializer = create_initializer_compound(2);
533 set_initializer_compound_value(initializer, 0,
534 create_initializer_tarval(get_mode_null(mode)));
535 set_initializer_compound_value(initializer, 1,
536 create_initializer_tarval(tv));
538 set_entity_initializer(ent, initializer);
540 set_entity_initializer(ent, create_initializer_tarval(tv));
543 /* cache the entry */
544 ent_cache[kct] = ent;
547 return ent_cache[kct];
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2, match_flags_t flags)
562 /* float constants are always available */
563 if (is_Const(node)) {
564 ir_mode *mode = get_irn_mode(node);
565 if (mode_is_float(mode)) {
566 if (ia32_cg_config.use_sse2) {
567 if (is_simple_sse_Const(node))
570 if (is_simple_x87_Const(node))
573 if (get_irn_n_edges(node) > 1)
581 load = get_Proj_pred(node);
582 pn = get_Proj_proj(node);
583 if (!is_Load(load) || pn != pn_Load_res)
585 if (get_nodes_block(load) != block)
587 /* we only use address mode if we're the only user of the load */
588 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
590 /* in some edge cases with address mode we might reach the load normally
591 * and through some AM sequence, if it is already materialized then we
592 * can't create an AM node from it */
593 if (be_is_transformed(node))
596 /* don't do AM if other node inputs depend on the load (via mem-proj) */
597 if (other != NULL && ia32_prevents_AM(block, load, other))
600 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
606 typedef struct ia32_address_mode_t ia32_address_mode_t;
607 struct ia32_address_mode_t {
612 ia32_op_type_t op_type;
616 unsigned commutative : 1;
617 unsigned ins_permuted : 1;
620 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node,
632 ia32_create_am_flags_t flags)
634 ia32_address_t *addr = &am->addr;
640 /* floating point immediates */
641 if (is_Const(node)) {
642 ir_entity *entity = ia32_create_float_const_entity(node);
643 addr->base = get_symconst_base();
644 addr->index = noreg_GP;
646 addr->symconst_ent = entity;
647 addr->tls_segment = false;
649 am->ls_mode = get_type_mode(get_entity_type(entity));
650 am->pinned = op_pin_state_floats;
654 load = get_Proj_pred(node);
655 ptr = get_Load_ptr(load);
656 mem = get_Load_mem(load);
657 new_mem = be_transform_node(mem);
658 am->pinned = get_irn_pinned(load);
659 am->ls_mode = get_Load_mode(load);
660 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
663 /* construct load address */
664 ia32_create_address_mode(addr, ptr, flags);
666 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
667 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
671 static void set_address(ir_node *node, const ia32_address_t *addr)
673 set_ia32_am_scale(node, addr->scale);
674 set_ia32_am_sc(node, addr->symconst_ent);
675 set_ia32_am_offs_int(node, addr->offset);
676 set_ia32_am_tls_segment(node, addr->tls_segment);
677 if (addr->symconst_sign)
678 set_ia32_am_sc_sign(node);
680 set_ia32_use_frame(node);
681 set_ia32_frame_ent(node, addr->frame_entity);
685 * Apply attributes of a given address mode to a node.
687 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
689 set_address(node, &am->addr);
691 set_ia32_op_type(node, am->op_type);
692 set_ia32_ls_mode(node, am->ls_mode);
693 if (am->pinned == op_pin_state_pinned) {
694 /* beware: some nodes are already pinned and did not allow to change the state */
695 if (get_irn_pinned(node) != op_pin_state_pinned)
696 set_irn_pinned(node, op_pin_state_pinned);
699 set_ia32_commutative(node);
703 * Check, if a given node is a Down-Conv, ie. a integer Conv
704 * from a mode with a mode with more bits to a mode with lesser bits.
705 * Moreover, we return only true if the node has not more than 1 user.
707 * @param node the node
708 * @return non-zero if node is a Down-Conv
710 static int is_downconv(const ir_node *node)
718 /* we only want to skip the conv when we're the only user
719 * (because this test is used in the context of address-mode selection
720 * and we don't want to use address mode for multiple users) */
721 if (get_irn_n_edges(node) > 1)
724 src_mode = get_irn_mode(get_Conv_op(node));
725 dest_mode = get_irn_mode(node);
727 ia32_mode_needs_gp_reg(src_mode) &&
728 ia32_mode_needs_gp_reg(dest_mode) &&
729 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
732 /** Skip all Down-Conv's on a given node and return the resulting node. */
733 ir_node *ia32_skip_downconv(ir_node *node)
735 while (is_downconv(node))
736 node = get_Conv_op(node);
741 static bool is_sameconv(ir_node *node)
749 /* we only want to skip the conv when we're the only user
750 * (because this test is used in the context of address-mode selection
751 * and we don't want to use address mode for multiple users) */
752 if (get_irn_n_edges(node) > 1)
755 src_mode = get_irn_mode(get_Conv_op(node));
756 dest_mode = get_irn_mode(node);
758 ia32_mode_needs_gp_reg(src_mode) &&
759 ia32_mode_needs_gp_reg(dest_mode) &&
760 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
763 /** Skip all signedness convs */
764 static ir_node *ia32_skip_sameconv(ir_node *node)
766 while (is_sameconv(node))
767 node = get_Conv_op(node);
772 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
774 ir_mode *mode = get_irn_mode(node);
779 if (mode_is_signed(mode)) {
784 block = get_nodes_block(node);
785 dbgi = get_irn_dbg_info(node);
787 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
791 * matches operands of a node into ia32 addressing/operand modes. This covers
792 * usage of source address mode, immediates, operations with non 32-bit modes,
794 * The resulting data is filled into the @p am struct. block is the block
795 * of the node whose arguments are matched. op1, op2 are the first and second
796 * input that are matched (op1 may be NULL). other_op is another unrelated
797 * input that is not matched! but which is needed sometimes to check if AM
798 * for op1/op2 is legal.
799 * @p flags describes the supported modes of the operation in detail.
801 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
802 ir_node *op1, ir_node *op2, ir_node *other_op,
805 ia32_address_t *addr = &am->addr;
806 ir_mode *mode = get_irn_mode(op2);
807 int mode_bits = get_mode_size_bits(mode);
808 ir_node *new_op1, *new_op2;
810 unsigned commutative;
811 int use_am_and_immediates;
814 memset(am, 0, sizeof(am[0]));
816 commutative = (flags & match_commutative) != 0;
817 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
818 use_am = (flags & match_am) != 0;
819 use_immediate = (flags & match_immediate) != 0;
820 assert(!use_am_and_immediates || use_immediate);
823 assert(!commutative || op1 != NULL);
824 assert(use_am || !(flags & match_8bit_am));
825 assert(use_am || !(flags & match_16bit_am));
827 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
828 (mode_bits == 16 && !(flags & match_16bit_am))) {
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
840 op2 = ia32_skip_sameconv(op2);
842 op1 = ia32_skip_sameconv(op1);
846 /* match immediates. firm nodes are normalized: constants are always on the
849 if (!(flags & match_try_am) && use_immediate) {
850 new_op2 = ia32_try_create_Immediate(op2, 0);
853 if (new_op2 == NULL &&
854 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
855 build_address(am, op2, ia32_create_am_normal);
856 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 if (mode_is_float(mode)) {
858 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
862 am->op_type = ia32_AddrModeS;
863 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
865 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
867 build_address(am, op1, ia32_create_am_normal);
869 if (mode_is_float(mode)) {
870 noreg = ia32_new_NoReg_vfp(current_ir_graph);
875 if (new_op2 != NULL) {
878 new_op1 = be_transform_node(op2);
880 am->ins_permuted = true;
882 am->op_type = ia32_AddrModeS;
884 am->op_type = ia32_Normal;
886 if (flags & match_try_am) {
892 mode = get_irn_mode(op2);
893 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
894 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
896 new_op2 = create_upconv(op2, NULL);
897 am->ls_mode = mode_Iu;
899 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
901 new_op2 = be_transform_node(op2);
902 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
905 if (addr->base == NULL)
906 addr->base = noreg_GP;
907 if (addr->index == NULL)
908 addr->index = noreg_GP;
909 if (addr->mem == NULL)
912 am->new_op1 = new_op1;
913 am->new_op2 = new_op2;
914 am->commutative = commutative;
918 * "Fixes" a node that uses address mode by turning it into mode_T
919 * and returning a pn_ia32_res Proj.
921 * @param node the node
922 * @param am its address mode
924 * @return a Proj(pn_ia32_res) if a memory address mode is used,
927 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
932 if (am->mem_proj == NULL)
935 /* we have to create a mode_T so the old MemProj can attach to us */
936 mode = get_irn_mode(node);
937 load = get_Proj_pred(am->mem_proj);
939 be_set_transformed_node(load, node);
941 if (mode != mode_T) {
942 set_irn_mode(node, mode_T);
943 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
950 * Construct a standard binary operation, set AM and immediate if required.
952 * @param node The original node for which the binop is created
953 * @param op1 The first operand
954 * @param op2 The second operand
955 * @param func The node constructor function
956 * @return The constructed ia32 node.
958 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
959 construct_binop_func *func, match_flags_t flags)
962 ir_node *block, *new_block, *new_node;
963 ia32_address_mode_t am;
964 ia32_address_t *addr = &am.addr;
966 block = get_nodes_block(node);
967 match_arguments(&am, block, op1, op2, NULL, flags);
969 dbgi = get_irn_dbg_info(node);
970 new_block = be_transform_node(block);
971 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
972 am.new_op1, am.new_op2);
973 set_am_attributes(new_node, &am);
974 /* we can't use source address mode anymore when using immediates */
975 if (!(flags & match_am_and_immediates) &&
976 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
977 set_ia32_am_support(new_node, ia32_am_none);
978 SET_IA32_ORIG_NODE(new_node, node);
980 new_node = fix_mem_proj(new_node, &am);
986 * Generic names for the inputs of an ia32 binary op.
989 n_ia32_l_binop_left, /**< ia32 left input */
990 n_ia32_l_binop_right, /**< ia32 right input */
991 n_ia32_l_binop_eflags /**< ia32 eflags input */
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
994 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
995 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
996 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
997 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
998 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
1001 * Construct a binary operation which also consumes the eflags.
1003 * @param node The node to transform
1004 * @param func The node constructor function
1005 * @param flags The match flags
1006 * @return The constructor ia32 node
1008 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1009 match_flags_t flags)
1011 ir_node *src_block = get_nodes_block(node);
1012 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1013 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1014 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1016 ir_node *block, *new_node, *new_eflags;
1017 ia32_address_mode_t am;
1018 ia32_address_t *addr = &am.addr;
1020 match_arguments(&am, src_block, op1, op2, eflags, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 block = be_transform_node(src_block);
1024 new_eflags = be_transform_node(eflags);
1025 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1026 am.new_op1, am.new_op2, new_eflags);
1027 set_am_attributes(new_node, &am);
1028 /* we can't use source address mode anymore when using immediates */
1029 if (!(flags & match_am_and_immediates) &&
1030 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1031 set_ia32_am_support(new_node, ia32_am_none);
1032 SET_IA32_ORIG_NODE(new_node, node);
1034 new_node = fix_mem_proj(new_node, &am);
1039 static ir_node *get_fpcw(void)
1041 if (initial_fpcw != NULL)
1042 return initial_fpcw;
1044 initial_fpcw = be_transform_node(old_initial_fpcw);
1045 return initial_fpcw;
1049 * Construct a standard binary operation, set AM and immediate if required.
1051 * @param op1 The first operand
1052 * @param op2 The second operand
1053 * @param func The node constructor function
1054 * @return The constructed ia32 node.
1056 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1057 construct_binop_float_func *func)
1059 ir_mode *mode = get_irn_mode(node);
1061 ir_node *block, *new_block, *new_node;
1062 ia32_address_mode_t am;
1063 ia32_address_t *addr = &am.addr;
1064 ia32_x87_attr_t *attr;
1065 /* All operations are considered commutative, because there are reverse
1067 match_flags_t flags = match_commutative;
1069 /* happens for div nodes... */
1070 if (mode == mode_T) {
1072 mode = get_Div_resmode(node);
1074 panic("can't determine mode");
1077 /* cannot use address mode with long double on x87 */
1078 if (get_mode_size_bits(mode) <= 64)
1081 block = get_nodes_block(node);
1082 match_arguments(&am, block, op1, op2, NULL, flags);
1084 dbgi = get_irn_dbg_info(node);
1085 new_block = be_transform_node(block);
1086 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1087 am.new_op1, am.new_op2, get_fpcw());
1088 set_am_attributes(new_node, &am);
1090 attr = get_ia32_x87_attr(new_node);
1091 attr->attr.data.ins_permuted = am.ins_permuted;
1093 SET_IA32_ORIG_NODE(new_node, node);
1095 new_node = fix_mem_proj(new_node, &am);
1101 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1103 * @param op1 The first operand
1104 * @param op2 The second operand
1105 * @param func The node constructor function
1106 * @return The constructed ia32 node.
1108 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1109 construct_shift_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1115 assert(! mode_is_float(get_irn_mode(node)));
1116 assert(flags & match_immediate);
1117 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1119 if (flags & match_mode_neutral) {
1120 op1 = ia32_skip_downconv(op1);
1121 new_op1 = be_transform_node(op1);
1122 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1123 new_op1 = create_upconv(op1, node);
1125 new_op1 = be_transform_node(op1);
1128 /* the shift amount can be any mode that is bigger than 5 bits, since all
1129 * other bits are ignored anyway */
1130 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1131 ir_node *const op = get_Conv_op(op2);
1132 if (mode_is_float(get_irn_mode(op)))
1135 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1137 new_op2 = create_immediate_or_transform(op2, 0);
1139 dbgi = get_irn_dbg_info(node);
1140 block = get_nodes_block(node);
1141 new_block = be_transform_node(block);
1142 new_node = func(dbgi, new_block, new_op1, new_op2);
1143 SET_IA32_ORIG_NODE(new_node, node);
1145 /* lowered shift instruction may have a dependency operand, handle it here */
1146 if (get_irn_arity(node) == 3) {
1147 /* we have a dependency */
1148 ir_node* dep = get_irn_n(node, 2);
1149 if (get_irn_n_edges(dep) > 1) {
1150 /* ... which has at least one user other than 'node' */
1151 ir_node *new_dep = be_transform_node(dep);
1152 add_irn_dep(new_node, new_dep);
1161 * Construct a standard unary operation, set AM and immediate if required.
1163 * @param op The operand
1164 * @param func The node constructor function
1165 * @return The constructed ia32 node.
1167 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1168 match_flags_t flags)
1171 ir_node *block, *new_block, *new_op, *new_node;
1173 assert(flags == 0 || flags == match_mode_neutral);
1174 if (flags & match_mode_neutral) {
1175 op = ia32_skip_downconv(op);
1178 new_op = be_transform_node(op);
1179 dbgi = get_irn_dbg_info(node);
1180 block = get_nodes_block(node);
1181 new_block = be_transform_node(block);
1182 new_node = func(dbgi, new_block, new_op);
1184 SET_IA32_ORIG_NODE(new_node, node);
1189 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1190 ia32_address_t *addr)
1200 base = be_transform_node(base);
1207 idx = be_transform_node(idx);
1210 /* segment overrides are ineffective for Leas :-( so we have to patch
1212 if (addr->tls_segment) {
1213 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1214 assert(addr->symconst_ent != NULL);
1215 if (base == noreg_GP)
1218 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1219 addr->tls_segment = false;
1222 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1223 set_address(res, addr);
1229 * Returns non-zero if a given address mode has a symbolic or
1230 * numerical offset != 0.
1232 static int am_has_immediates(const ia32_address_t *addr)
1234 return addr->offset != 0 || addr->symconst_ent != NULL
1235 || addr->frame_entity || addr->use_frame;
1239 * Creates an ia32 Add.
1241 * @return the created ia32 Add node
1243 static ir_node *gen_Add(ir_node *node)
1245 ir_mode *mode = get_irn_mode(node);
1246 ir_node *op1 = get_Add_left(node);
1247 ir_node *op2 = get_Add_right(node);
1249 ir_node *block, *new_block, *new_node, *add_immediate_op;
1250 ia32_address_t addr;
1251 ia32_address_mode_t am;
1253 if (mode_is_float(mode)) {
1254 if (ia32_cg_config.use_sse2)
1255 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1256 match_commutative | match_am);
1258 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1261 ia32_mark_non_am(node);
1263 op2 = ia32_skip_downconv(op2);
1264 op1 = ia32_skip_downconv(op1);
1268 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1269 * 1. Add with immediate -> Lea
1270 * 2. Add with possible source address mode -> Add
1271 * 3. Otherwise -> Lea
1273 memset(&addr, 0, sizeof(addr));
1274 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1275 add_immediate_op = NULL;
1277 dbgi = get_irn_dbg_info(node);
1278 block = get_nodes_block(node);
1279 new_block = be_transform_node(block);
1282 if (addr.base == NULL && addr.index == NULL) {
1283 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1284 addr.symconst_sign, 0, addr.offset);
1285 SET_IA32_ORIG_NODE(new_node, node);
1288 /* add with immediate? */
1289 if (addr.index == NULL) {
1290 add_immediate_op = addr.base;
1291 } else if (addr.base == NULL && addr.scale == 0) {
1292 add_immediate_op = addr.index;
1295 if (add_immediate_op != NULL) {
1296 if (!am_has_immediates(&addr)) {
1297 #ifdef DEBUG_libfirm
1298 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1301 return be_transform_node(add_immediate_op);
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1309 /* test if we can use source address mode */
1310 match_arguments(&am, block, op1, op2, NULL, match_commutative
1311 | match_mode_neutral | match_am | match_immediate | match_try_am);
1313 /* construct an Add with source address mode */
1314 if (am.op_type == ia32_AddrModeS) {
1315 ia32_address_t *am_addr = &am.addr;
1316 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1317 am_addr->index, am_addr->mem, am.new_op1,
1319 set_am_attributes(new_node, &am);
1320 SET_IA32_ORIG_NODE(new_node, node);
1322 new_node = fix_mem_proj(new_node, &am);
1327 /* otherwise construct a lea */
1328 new_node = create_lea_from_address(dbgi, new_block, &addr);
1329 SET_IA32_ORIG_NODE(new_node, node);
1334 * Creates an ia32 Mul.
1336 * @return the created ia32 Mul node
1338 static ir_node *gen_Mul(ir_node *node)
1340 ir_node *op1 = get_Mul_left(node);
1341 ir_node *op2 = get_Mul_right(node);
1342 ir_mode *mode = get_irn_mode(node);
1344 if (mode_is_float(mode)) {
1345 if (ia32_cg_config.use_sse2)
1346 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1347 match_commutative | match_am);
1349 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1351 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1352 match_commutative | match_am | match_mode_neutral |
1353 match_immediate | match_am_and_immediates);
1357 * Creates an ia32 Mulh.
1358 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1359 * this result while Mul returns the lower 32 bit.
1361 * @return the created ia32 Mulh node
1363 static ir_node *gen_Mulh(ir_node *node)
1365 dbg_info *dbgi = get_irn_dbg_info(node);
1366 ir_node *op1 = get_Mulh_left(node);
1367 ir_node *op2 = get_Mulh_right(node);
1368 ir_mode *mode = get_irn_mode(node);
1370 ir_node *proj_res_high;
1372 if (get_mode_size_bits(mode) != 32) {
1373 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1376 if (mode_is_signed(mode)) {
1377 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1378 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1380 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1381 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1383 return proj_res_high;
1387 * Creates an ia32 And.
1389 * @return The created ia32 And node
1391 static ir_node *gen_And(ir_node *node)
1393 ir_node *op1 = get_And_left(node);
1394 ir_node *op2 = get_And_right(node);
1395 assert(! mode_is_float(get_irn_mode(node)));
1397 /* is it a zero extension? */
1398 if (is_Const(op2)) {
1399 ir_tarval *tv = get_Const_tarval(op2);
1400 long v = get_tarval_long(tv);
1402 if (v == 0xFF || v == 0xFFFF) {
1403 dbg_info *dbgi = get_irn_dbg_info(node);
1404 ir_node *block = get_nodes_block(node);
1411 assert(v == 0xFFFF);
1414 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1419 return gen_binop(node, op1, op2, new_bd_ia32_And,
1420 match_commutative | match_mode_neutral | match_am | match_immediate);
1424 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1427 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1429 if (is_Const(value1) && is_Const(value2)) {
1430 ir_tarval *tv1 = get_Const_tarval(value1);
1431 ir_tarval *tv2 = get_Const_tarval(value2);
1432 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1433 long v1 = get_tarval_long(tv1);
1434 long v2 = get_tarval_long(tv2);
1435 return v1 <= v2 && v2 == 32-v1;
1441 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1442 ir_node *high, ir_node *low,
1446 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1447 * op1 - target to be shifted
1448 * op2 - contains bits to be shifted into target
1450 * Only op3 can be an immediate.
1452 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1453 ir_node *high, ir_node *low, ir_node *count,
1454 new_shiftd_func func)
1456 ir_node *new_block = be_transform_node(block);
1457 ir_node *new_high = be_transform_node(high);
1458 ir_node *new_low = be_transform_node(low);
1462 /* the shift amount can be any mode that is bigger than 5 bits, since all
1463 * other bits are ignored anyway */
1464 while (is_Conv(count) &&
1465 get_irn_n_edges(count) == 1 &&
1466 mode_is_int(get_irn_mode(count))) {
1467 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1468 count = get_Conv_op(count);
1470 new_count = create_immediate_or_transform(count, 0);
1472 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1476 static ir_node *match_64bit_shift(ir_node *node)
1478 ir_node *op1 = get_Or_left(node);
1479 ir_node *op2 = get_Or_right(node);
1487 /* match ShlD operation */
1488 if (is_Shl(op1) && is_Shr(op2)) {
1489 ir_node *shl_right = get_Shl_right(op1);
1490 ir_node *shl_left = get_Shl_left(op1);
1491 ir_node *shr_right = get_Shr_right(op2);
1492 ir_node *shr_left = get_Shr_left(op2);
1493 /* constant ShlD operation */
1494 if (is_complementary_shifts(shl_right, shr_right)) {
1495 dbg_info *dbgi = get_irn_dbg_info(node);
1496 ir_node *block = get_nodes_block(node);
1497 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1500 /* constant ShrD operation */
1501 if (is_complementary_shifts(shr_right, shl_right)) {
1502 dbg_info *dbgi = get_irn_dbg_info(node);
1503 ir_node *block = get_nodes_block(node);
1504 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1507 /* lower_dw produces the following for ShlD:
1508 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1509 if (is_Shr(shr_left) && is_Not(shr_right)
1510 && is_Const_1(get_Shr_right(shr_left))
1511 && get_Not_op(shr_right) == shl_right) {
1512 dbg_info *dbgi = get_irn_dbg_info(node);
1513 ir_node *block = get_nodes_block(node);
1514 ir_node *val_h = get_Shr_left(shr_left);
1515 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1518 /* lower_dw produces the following for ShrD:
1519 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1520 if (is_Shl(shl_left) && is_Not(shl_right)
1521 && is_Const_1(get_Shl_right(shl_left))
1522 && get_Not_op(shl_right) == shr_right) {
1523 dbg_info *dbgi = get_irn_dbg_info(node);
1524 ir_node *block = get_nodes_block(node);
1525 ir_node *val_h = get_Shl_left(shl_left);
1526 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1535 * Creates an ia32 Or.
1537 * @return The created ia32 Or node
1539 static ir_node *gen_Or(ir_node *node)
1541 ir_node *op1 = get_Or_left(node);
1542 ir_node *op2 = get_Or_right(node);
1545 res = match_64bit_shift(node);
1549 assert (! mode_is_float(get_irn_mode(node)));
1550 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1551 | match_mode_neutral | match_am | match_immediate);
1557 * Creates an ia32 Eor.
1559 * @return The created ia32 Eor node
1561 static ir_node *gen_Eor(ir_node *node)
1563 ir_node *op1 = get_Eor_left(node);
1564 ir_node *op2 = get_Eor_right(node);
1566 assert(! mode_is_float(get_irn_mode(node)));
1567 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1568 | match_mode_neutral | match_am | match_immediate);
1573 * Creates an ia32 Sub.
1575 * @return The created ia32 Sub node
1577 static ir_node *gen_Sub(ir_node *node)
1579 ir_node *op1 = get_Sub_left(node);
1580 ir_node *op2 = get_Sub_right(node);
1581 ir_mode *mode = get_irn_mode(node);
1583 if (mode_is_float(mode)) {
1584 if (ia32_cg_config.use_sse2)
1585 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1587 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1590 if (is_Const(op2)) {
1591 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1595 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1596 | match_am | match_immediate);
1599 static ir_node *transform_AM_mem(ir_node *const block,
1600 ir_node *const src_val,
1601 ir_node *const src_mem,
1602 ir_node *const am_mem)
1604 if (is_NoMem(am_mem)) {
1605 return be_transform_node(src_mem);
1606 } else if (is_Proj(src_val) &&
1608 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1609 /* avoid memory loop */
1611 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1612 ir_node *const ptr_pred = get_Proj_pred(src_val);
1613 int const arity = get_Sync_n_preds(src_mem);
1618 NEW_ARR_A(ir_node*, ins, arity + 1);
1620 /* NOTE: This sometimes produces dead-code because the old sync in
1621 * src_mem might not be used anymore, we should detect this case
1622 * and kill the sync... */
1623 for (i = arity - 1; i >= 0; --i) {
1624 ir_node *const pred = get_Sync_pred(src_mem, i);
1626 /* avoid memory loop */
1627 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1630 ins[n++] = be_transform_node(pred);
1633 if (n==1 && ins[0] == am_mem) {
1635 /* creating a new Sync and relying on CSE may fail,
1636 * if am_mem is a ProjM, which does not yet verify. */
1640 return new_r_Sync(block, n, ins);
1644 ins[0] = be_transform_node(src_mem);
1646 return new_r_Sync(block, 2, ins);
1651 * Create a 32bit to 64bit signed extension.
1653 * @param dbgi debug info
1654 * @param block the block where node nodes should be placed
1655 * @param val the value to extend
1656 * @param orig the original node
1658 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1659 ir_node *val, const ir_node *orig)
1664 if (ia32_cg_config.use_short_sex_eax) {
1665 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1666 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1668 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1669 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1671 SET_IA32_ORIG_NODE(res, orig);
1676 * Generates an ia32 Div with additional infrastructure for the
1677 * register allocator if needed.
1679 static ir_node *create_Div(ir_node *node)
1681 dbg_info *dbgi = get_irn_dbg_info(node);
1682 ir_node *block = get_nodes_block(node);
1683 ir_node *new_block = be_transform_node(block);
1684 int throws_exception = ir_throws_exception(node);
1691 ir_node *sign_extension;
1692 ia32_address_mode_t am;
1693 ia32_address_t *addr = &am.addr;
1695 /* the upper bits have random contents for smaller modes */
1696 switch (get_irn_opcode(node)) {
1698 op1 = get_Div_left(node);
1699 op2 = get_Div_right(node);
1700 mem = get_Div_mem(node);
1701 mode = get_Div_resmode(node);
1704 op1 = get_Mod_left(node);
1705 op2 = get_Mod_right(node);
1706 mem = get_Mod_mem(node);
1707 mode = get_Mod_resmode(node);
1710 panic("invalid divmod node %+F", node);
1713 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1715 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1716 is the memory of the consumed address. We can have only the second op as address
1717 in Div nodes, so check only op2. */
1718 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1720 if (mode_is_signed(mode)) {
1721 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1722 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1723 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1725 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1727 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1728 addr->index, new_mem, am.new_op2,
1729 am.new_op1, sign_extension);
1731 ir_set_throws_exception(new_node, throws_exception);
1733 set_irn_pinned(new_node, get_irn_pinned(node));
1735 set_am_attributes(new_node, &am);
1736 SET_IA32_ORIG_NODE(new_node, node);
1738 new_node = fix_mem_proj(new_node, &am);
1744 * Generates an ia32 Mod.
1746 static ir_node *gen_Mod(ir_node *node)
1748 return create_Div(node);
1752 * Generates an ia32 Div.
1754 static ir_node *gen_Div(ir_node *node)
1756 ir_mode *mode = get_Div_resmode(node);
1757 if (mode_is_float(mode)) {
1758 ir_node *op1 = get_Div_left(node);
1759 ir_node *op2 = get_Div_right(node);
1761 if (ia32_cg_config.use_sse2) {
1762 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1764 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1768 return create_Div(node);
1772 * Creates an ia32 Shl.
1774 * @return The created ia32 Shl node
1776 static ir_node *gen_Shl(ir_node *node)
1778 ir_node *left = get_Shl_left(node);
1779 ir_node *right = get_Shl_right(node);
1781 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1782 match_mode_neutral | match_immediate);
1786 * Creates an ia32 Shr.
1788 * @return The created ia32 Shr node
1790 static ir_node *gen_Shr(ir_node *node)
1792 ir_node *left = get_Shr_left(node);
1793 ir_node *right = get_Shr_right(node);
1795 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1801 * Creates an ia32 Sar.
1803 * @return The created ia32 Shrs node
1805 static ir_node *gen_Shrs(ir_node *node)
1807 ir_node *left = get_Shrs_left(node);
1808 ir_node *right = get_Shrs_right(node);
1810 if (is_Const(right)) {
1811 ir_tarval *tv = get_Const_tarval(right);
1812 long val = get_tarval_long(tv);
1814 /* this is a sign extension */
1815 dbg_info *dbgi = get_irn_dbg_info(node);
1816 ir_node *block = be_transform_node(get_nodes_block(node));
1817 ir_node *new_op = be_transform_node(left);
1819 return create_sex_32_64(dbgi, block, new_op, node);
1823 /* 8 or 16 bit sign extension? */
1824 if (is_Const(right) && is_Shl(left)) {
1825 ir_node *shl_left = get_Shl_left(left);
1826 ir_node *shl_right = get_Shl_right(left);
1827 if (is_Const(shl_right)) {
1828 ir_tarval *tv1 = get_Const_tarval(right);
1829 ir_tarval *tv2 = get_Const_tarval(shl_right);
1830 if (tv1 == tv2 && tarval_is_long(tv1)) {
1831 long val = get_tarval_long(tv1);
1832 if (val == 16 || val == 24) {
1833 dbg_info *dbgi = get_irn_dbg_info(node);
1834 ir_node *block = get_nodes_block(node);
1844 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1853 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1859 * Creates an ia32 Rol.
1861 * @param op1 The first operator
1862 * @param op2 The second operator
1863 * @return The created ia32 RotL node
1865 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1867 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1873 * Creates an ia32 Ror.
1874 * NOTE: There is no RotR with immediate because this would always be a RotL
1875 * "imm-mode_size_bits" which can be pre-calculated.
1877 * @param op1 The first operator
1878 * @param op2 The second operator
1879 * @return The created ia32 RotR node
1881 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1883 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1889 * Creates an ia32 RotR or RotL (depending on the found pattern).
1891 * @return The created ia32 RotL or RotR node
1893 static ir_node *gen_Rotl(ir_node *node)
1895 ir_node *op1 = get_Rotl_left(node);
1896 ir_node *op2 = get_Rotl_right(node);
1898 if (is_Minus(op2)) {
1899 return gen_Ror(node, op1, get_Minus_op(op2));
1902 return gen_Rol(node, op1, op2);
1908 * Transforms a Minus node.
1910 * @return The created ia32 Minus node
1912 static ir_node *gen_Minus(ir_node *node)
1914 ir_node *op = get_Minus_op(node);
1915 ir_node *block = be_transform_node(get_nodes_block(node));
1916 dbg_info *dbgi = get_irn_dbg_info(node);
1917 ir_mode *mode = get_irn_mode(node);
1922 if (mode_is_float(mode)) {
1923 ir_node *new_op = be_transform_node(op);
1924 if (ia32_cg_config.use_sse2) {
1925 /* TODO: non-optimal... if we have many xXors, then we should
1926 * rather create a load for the const and use that instead of
1927 * several AM nodes... */
1928 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1930 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1931 noreg_GP, nomem, new_op, noreg_xmm);
1933 size = get_mode_size_bits(mode);
1934 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1936 set_ia32_am_sc(new_node, ent);
1937 set_ia32_op_type(new_node, ia32_AddrModeS);
1938 set_ia32_ls_mode(new_node, mode);
1940 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1943 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1946 SET_IA32_ORIG_NODE(new_node, node);
1952 * Transforms a Not node.
1954 * @return The created ia32 Not node
1956 static ir_node *gen_Not(ir_node *node)
1958 ir_node *op = get_Not_op(node);
1960 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1961 assert (! mode_is_float(get_irn_mode(node)));
1963 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1966 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1967 bool negate, ir_node *node)
1969 ir_node *new_block = be_transform_node(block);
1970 ir_mode *mode = get_irn_mode(op);
1971 ir_node *new_op = be_transform_node(op);
1976 assert(mode_is_float(mode));
1978 if (ia32_cg_config.use_sse2) {
1979 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1980 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1981 noreg_GP, nomem, new_op, noreg_fp);
1983 size = get_mode_size_bits(mode);
1984 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1986 set_ia32_am_sc(new_node, ent);
1988 SET_IA32_ORIG_NODE(new_node, node);
1990 set_ia32_op_type(new_node, ia32_AddrModeS);
1991 set_ia32_ls_mode(new_node, mode);
1993 /* TODO, implement -Abs case */
1996 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1997 SET_IA32_ORIG_NODE(new_node, node);
1999 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2000 SET_IA32_ORIG_NODE(new_node, node);
2008 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2010 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2012 dbg_info *dbgi = get_irn_dbg_info(cmp);
2013 ir_node *block = get_nodes_block(cmp);
2014 ir_node *new_block = be_transform_node(block);
2015 ir_node *op1 = be_transform_node(x);
2016 ir_node *op2 = be_transform_node(n);
2018 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2021 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2024 if (mode_is_float(mode)) {
2026 case ir_relation_equal: return ia32_cc_float_equal;
2027 case ir_relation_less: return ia32_cc_float_below;
2028 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2029 case ir_relation_greater: return ia32_cc_float_above;
2030 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2031 case ir_relation_less_greater: return ia32_cc_not_equal;
2032 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2033 case ir_relation_unordered: return ia32_cc_parity;
2034 case ir_relation_unordered_equal: return ia32_cc_equal;
2035 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2036 case ir_relation_unordered_less_equal:
2037 return ia32_cc_float_unordered_below_equal;
2038 case ir_relation_unordered_greater:
2039 return ia32_cc_float_unordered_above;
2040 case ir_relation_unordered_greater_equal:
2041 return ia32_cc_float_unordered_above_equal;
2042 case ir_relation_unordered_less_greater:
2043 return ia32_cc_float_not_equal;
2044 case ir_relation_false:
2045 case ir_relation_true:
2046 /* should we introduce a jump always/jump never? */
2049 panic("Unexpected float pnc");
2050 } else if (mode_is_signed(mode)) {
2052 case ir_relation_unordered_equal:
2053 case ir_relation_equal: return ia32_cc_equal;
2054 case ir_relation_unordered_less:
2055 case ir_relation_less: return ia32_cc_less;
2056 case ir_relation_unordered_less_equal:
2057 case ir_relation_less_equal: return ia32_cc_less_equal;
2058 case ir_relation_unordered_greater:
2059 case ir_relation_greater: return ia32_cc_greater;
2060 case ir_relation_unordered_greater_equal:
2061 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2062 case ir_relation_unordered_less_greater:
2063 case ir_relation_less_greater: return ia32_cc_not_equal;
2064 case ir_relation_less_equal_greater:
2065 case ir_relation_unordered:
2066 case ir_relation_false:
2067 case ir_relation_true:
2068 /* introduce jump always/jump never? */
2071 panic("Unexpected pnc");
2074 case ir_relation_unordered_equal:
2075 case ir_relation_equal: return ia32_cc_equal;
2076 case ir_relation_unordered_less:
2077 case ir_relation_less: return ia32_cc_below;
2078 case ir_relation_unordered_less_equal:
2079 case ir_relation_less_equal: return ia32_cc_below_equal;
2080 case ir_relation_unordered_greater:
2081 case ir_relation_greater: return ia32_cc_above;
2082 case ir_relation_unordered_greater_equal:
2083 case ir_relation_greater_equal: return ia32_cc_above_equal;
2084 case ir_relation_unordered_less_greater:
2085 case ir_relation_less_greater: return ia32_cc_not_equal;
2086 case ir_relation_less_equal_greater:
2087 case ir_relation_unordered:
2088 case ir_relation_false:
2089 case ir_relation_true:
2090 /* introduce jump always/jump never? */
2093 panic("Unexpected pnc");
2097 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
2099 /* a mode_b value, we have to compare it against 0 */
2100 dbg_info *dbgi = get_irn_dbg_info(node);
2101 ir_node *new_block = be_transform_node(get_nodes_block(node));
2102 ir_node *new_op = be_transform_node(node);
2103 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
2104 *cc_out = ia32_cc_not_equal;
2108 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2110 /* must have a Cmp as input */
2111 ir_relation relation = get_Cmp_relation(cmp);
2112 ir_relation possible;
2113 ir_node *l = get_Cmp_left(cmp);
2114 ir_node *r = get_Cmp_right(cmp);
2115 ir_mode *mode = get_irn_mode(l);
2118 /* check for bit-test */
2119 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2120 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2121 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2123 ir_node *la = get_And_left(l);
2124 ir_node *ra = get_And_right(l);
2131 ir_node *c = get_Shl_left(la);
2132 if (is_Const_1(c) && is_Const_0(r)) {
2133 /* (1 << n) & ra) */
2134 ir_node *n = get_Shl_right(la);
2135 flags = gen_bt(cmp, ra, n);
2136 /* the bit is copied into the CF flag */
2137 if (relation & ir_relation_equal)
2138 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2140 *cc_out = ia32_cc_below; /* test for CF=1 */
2146 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2147 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2148 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2149 * a predecessor node). So add the < bit */
2150 possible = ir_get_possible_cmp_relations(l, r);
2151 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2152 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2153 relation |= ir_relation_less_greater;
2155 /* just do a normal transformation of the Cmp */
2156 *cc_out = relation_to_condition_code(relation, mode);
2157 flags = be_transform_node(cmp);
2162 * Transform a node returning a "flag" result.
2164 * @param node the node to transform
2165 * @param cc_out the compare mode to use
2167 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2170 return get_flags_node_cmp(node, cc_out);
2171 assert(get_irn_mode(node) == mode_b);
2172 return get_flags_mode_b(node, cc_out);
2176 * Transforms a Load.
2178 * @return the created ia32 Load node
2180 static ir_node *gen_Load(ir_node *node)
2182 ir_node *old_block = get_nodes_block(node);
2183 ir_node *block = be_transform_node(old_block);
2184 ir_node *ptr = get_Load_ptr(node);
2185 ir_node *mem = get_Load_mem(node);
2186 ir_node *new_mem = be_transform_node(mem);
2187 dbg_info *dbgi = get_irn_dbg_info(node);
2188 ir_mode *mode = get_Load_mode(node);
2189 int throws_exception = ir_throws_exception(node);
2193 ia32_address_t addr;
2195 /* construct load address */
2196 memset(&addr, 0, sizeof(addr));
2197 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2204 base = be_transform_node(base);
2210 idx = be_transform_node(idx);
2213 if (mode_is_float(mode)) {
2214 if (ia32_cg_config.use_sse2) {
2215 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2218 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2222 assert(mode != mode_b);
2224 /* create a conv node with address mode for smaller modes */
2225 if (get_mode_size_bits(mode) < 32) {
2226 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2227 new_mem, noreg_GP, mode);
2229 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2232 ir_set_throws_exception(new_node, throws_exception);
2234 set_irn_pinned(new_node, get_irn_pinned(node));
2235 set_ia32_op_type(new_node, ia32_AddrModeS);
2236 set_ia32_ls_mode(new_node, mode);
2237 set_address(new_node, &addr);
2239 if (get_irn_pinned(node) == op_pin_state_floats) {
2240 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2241 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2242 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2243 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2246 SET_IA32_ORIG_NODE(new_node, node);
2251 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2252 ir_node *ptr, ir_node *other)
2259 /* we only use address mode if we're the only user of the load */
2260 if (get_irn_n_edges(node) > 1)
2263 load = get_Proj_pred(node);
2266 if (get_nodes_block(load) != block)
2269 /* store should have the same pointer as the load */
2270 if (get_Load_ptr(load) != ptr)
2273 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2274 if (other != NULL &&
2275 get_nodes_block(other) == block &&
2276 heights_reachable_in_block(ia32_heights, other, load)) {
2280 if (ia32_prevents_AM(block, load, mem))
2282 /* Store should be attached to the load via mem */
2283 assert(heights_reachable_in_block(ia32_heights, mem, load));
2288 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2289 ir_node *mem, ir_node *ptr, ir_mode *mode,
2290 construct_binop_dest_func *func,
2291 construct_binop_dest_func *func8bit,
2292 match_flags_t flags)
2294 ir_node *src_block = get_nodes_block(node);
2302 ia32_address_mode_t am;
2303 ia32_address_t *addr = &am.addr;
2304 memset(&am, 0, sizeof(am));
2306 assert(flags & match_immediate); /* there is no destam node without... */
2307 commutative = (flags & match_commutative) != 0;
2309 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2310 build_address(&am, op1, ia32_create_am_double_use);
2311 new_op = create_immediate_or_transform(op2, 0);
2312 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2313 build_address(&am, op2, ia32_create_am_double_use);
2314 new_op = create_immediate_or_transform(op1, 0);
2319 if (addr->base == NULL)
2320 addr->base = noreg_GP;
2321 if (addr->index == NULL)
2322 addr->index = noreg_GP;
2323 if (addr->mem == NULL)
2326 dbgi = get_irn_dbg_info(node);
2327 block = be_transform_node(src_block);
2328 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2330 if (get_mode_size_bits(mode) == 8) {
2331 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2333 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2335 set_address(new_node, addr);
2336 set_ia32_op_type(new_node, ia32_AddrModeD);
2337 set_ia32_ls_mode(new_node, mode);
2338 SET_IA32_ORIG_NODE(new_node, node);
2340 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2341 mem_proj = be_transform_node(am.mem_proj);
2342 be_set_transformed_node(am.mem_proj, new_node);
2343 be_set_transformed_node(mem_proj, new_node);
2348 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2349 ir_node *ptr, ir_mode *mode,
2350 construct_unop_dest_func *func)
2352 ir_node *src_block = get_nodes_block(node);
2358 ia32_address_mode_t am;
2359 ia32_address_t *addr = &am.addr;
2361 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2364 memset(&am, 0, sizeof(am));
2365 build_address(&am, op, ia32_create_am_double_use);
2367 dbgi = get_irn_dbg_info(node);
2368 block = be_transform_node(src_block);
2369 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2370 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2371 set_address(new_node, addr);
2372 set_ia32_op_type(new_node, ia32_AddrModeD);
2373 set_ia32_ls_mode(new_node, mode);
2374 SET_IA32_ORIG_NODE(new_node, node);
2376 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2377 mem_proj = be_transform_node(am.mem_proj);
2378 be_set_transformed_node(am.mem_proj, new_node);
2379 be_set_transformed_node(mem_proj, new_node);
2384 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2386 ir_mode *mode = get_irn_mode(node);
2387 ir_node *mux_true = get_Mux_true(node);
2388 ir_node *mux_false = get_Mux_false(node);
2396 ia32_condition_code_t cc;
2397 ia32_address_t addr;
2399 if (get_mode_size_bits(mode) != 8)
2402 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2404 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2410 cond = get_Mux_sel(node);
2411 flags = get_flags_node(cond, &cc);
2412 /* we can't handle the float special cases with SetM */
2413 if (cc & ia32_cc_additional_float_cases)
2416 cc = ia32_negate_condition_code(cc);
2418 build_address_ptr(&addr, ptr, mem);
2420 dbgi = get_irn_dbg_info(node);
2421 block = get_nodes_block(node);
2422 new_block = be_transform_node(block);
2423 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2424 addr.index, addr.mem, flags, cc);
2425 set_address(new_node, &addr);
2426 set_ia32_op_type(new_node, ia32_AddrModeD);
2427 set_ia32_ls_mode(new_node, mode);
2428 SET_IA32_ORIG_NODE(new_node, node);
2433 static ir_node *try_create_dest_am(ir_node *node)
2435 ir_node *val = get_Store_value(node);
2436 ir_node *mem = get_Store_mem(node);
2437 ir_node *ptr = get_Store_ptr(node);
2438 ir_mode *mode = get_irn_mode(val);
2439 unsigned bits = get_mode_size_bits(mode);
2444 /* handle only GP modes for now... */
2445 if (!ia32_mode_needs_gp_reg(mode))
2449 /* store must be the only user of the val node */
2450 if (get_irn_n_edges(val) > 1)
2452 /* skip pointless convs */
2454 ir_node *conv_op = get_Conv_op(val);
2455 ir_mode *pred_mode = get_irn_mode(conv_op);
2456 if (!ia32_mode_needs_gp_reg(pred_mode))
2458 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2466 /* value must be in the same block */
2467 if (get_nodes_block(node) != get_nodes_block(val))
2470 switch (get_irn_opcode(val)) {
2472 op1 = get_Add_left(val);
2473 op2 = get_Add_right(val);
2474 if (ia32_cg_config.use_incdec) {
2475 if (is_Const_1(op2)) {
2476 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2478 } else if (is_Const_Minus_1(op2)) {
2479 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2483 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2484 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2485 match_commutative | match_immediate);
2488 op1 = get_Sub_left(val);
2489 op2 = get_Sub_right(val);
2490 if (is_Const(op2)) {
2491 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2493 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2494 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2498 op1 = get_And_left(val);
2499 op2 = get_And_right(val);
2500 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2501 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2502 match_commutative | match_immediate);
2505 op1 = get_Or_left(val);
2506 op2 = get_Or_right(val);
2507 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2508 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2509 match_commutative | match_immediate);
2512 op1 = get_Eor_left(val);
2513 op2 = get_Eor_right(val);
2514 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2515 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2516 match_commutative | match_immediate);
2519 op1 = get_Shl_left(val);
2520 op2 = get_Shl_right(val);
2521 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2522 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2526 op1 = get_Shr_left(val);
2527 op2 = get_Shr_right(val);
2528 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2529 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2533 op1 = get_Shrs_left(val);
2534 op2 = get_Shrs_right(val);
2535 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2536 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2540 op1 = get_Rotl_left(val);
2541 op2 = get_Rotl_right(val);
2542 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2543 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2546 /* TODO: match ROR patterns... */
2548 new_node = try_create_SetMem(val, ptr, mem);
2552 op1 = get_Minus_op(val);
2553 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2556 /* should be lowered already */
2557 assert(mode != mode_b);
2558 op1 = get_Not_op(val);
2559 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2565 if (new_node != NULL) {
2566 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2567 get_irn_pinned(node) == op_pin_state_pinned) {
2568 set_irn_pinned(new_node, op_pin_state_pinned);
2575 static bool possible_int_mode_for_fp(ir_mode *mode)
2579 if (!mode_is_signed(mode))
2581 size = get_mode_size_bits(mode);
2582 if (size != 16 && size != 32)
2587 static int is_float_to_int_conv(const ir_node *node)
2589 ir_mode *mode = get_irn_mode(node);
2593 if (!possible_int_mode_for_fp(mode))
2598 conv_op = get_Conv_op(node);
2599 conv_mode = get_irn_mode(conv_op);
2601 if (!mode_is_float(conv_mode))
2608 * Transform a Store(floatConst) into a sequence of
2611 * @return the created ia32 Store node
2613 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2615 ir_mode *mode = get_irn_mode(cns);
2616 unsigned size = get_mode_size_bytes(mode);
2617 ir_tarval *tv = get_Const_tarval(cns);
2618 ir_node *block = get_nodes_block(node);
2619 ir_node *new_block = be_transform_node(block);
2620 ir_node *ptr = get_Store_ptr(node);
2621 ir_node *mem = get_Store_mem(node);
2622 dbg_info *dbgi = get_irn_dbg_info(node);
2625 int throws_exception = ir_throws_exception(node);
2627 ia32_address_t addr;
2629 assert(size % 4 == 0);
2632 build_address_ptr(&addr, ptr, mem);
2636 get_tarval_sub_bits(tv, ofs) |
2637 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2638 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2639 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2640 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2642 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2643 addr.index, addr.mem, imm);
2644 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2646 ir_set_throws_exception(new_node, throws_exception);
2647 set_irn_pinned(new_node, get_irn_pinned(node));
2648 set_ia32_op_type(new_node, ia32_AddrModeD);
2649 set_ia32_ls_mode(new_node, mode_Iu);
2650 set_address(new_node, &addr);
2651 SET_IA32_ORIG_NODE(new_node, node);
2659 } while (size != 0);
2662 return new_rd_Sync(dbgi, new_block, i, ins);
2664 return get_Proj_pred(ins[0]);
2669 * Generate a vfist or vfisttp instruction.
2671 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2672 ir_node *index, ir_node *mem, ir_node *val)
2674 if (ia32_cg_config.use_fisttp) {
2675 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2676 if other users exists */
2677 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2678 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2679 be_new_Keep(block, 1, &value);
2683 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2686 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2692 * Transforms a general (no special case) Store.
2694 * @return the created ia32 Store node
2696 static ir_node *gen_general_Store(ir_node *node)
2698 ir_node *val = get_Store_value(node);
2699 ir_mode *mode = get_irn_mode(val);
2700 ir_node *block = get_nodes_block(node);
2701 ir_node *new_block = be_transform_node(block);
2702 ir_node *ptr = get_Store_ptr(node);
2703 ir_node *mem = get_Store_mem(node);
2704 dbg_info *dbgi = get_irn_dbg_info(node);
2705 int throws_exception = ir_throws_exception(node);
2708 ia32_address_t addr;
2710 /* check for destination address mode */
2711 new_node = try_create_dest_am(node);
2712 if (new_node != NULL)
2715 /* construct store address */
2716 memset(&addr, 0, sizeof(addr));
2717 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2719 if (addr.base == NULL) {
2720 addr.base = noreg_GP;
2722 addr.base = be_transform_node(addr.base);
2725 if (addr.index == NULL) {
2726 addr.index = noreg_GP;
2728 addr.index = be_transform_node(addr.index);
2730 addr.mem = be_transform_node(mem);
2732 if (mode_is_float(mode)) {
2733 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2735 while (is_Conv(val) && mode == get_irn_mode(val)) {
2736 ir_node *op = get_Conv_op(val);
2737 if (!mode_is_float(get_irn_mode(op)))
2741 new_val = be_transform_node(val);
2742 if (ia32_cg_config.use_sse2) {
2743 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2744 addr.index, addr.mem, new_val);
2746 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2747 addr.index, addr.mem, new_val, mode);
2749 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2750 val = get_Conv_op(val);
2752 /* TODO: is this optimisation still necessary at all (middleend)? */
2753 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2755 while (is_Conv(val)) {
2756 ir_node *op = get_Conv_op(val);
2757 if (!mode_is_float(get_irn_mode(op)))
2759 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2763 new_val = be_transform_node(val);
2764 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2766 new_val = create_immediate_or_transform(val, 0);
2767 assert(mode != mode_b);
2769 if (get_mode_size_bits(mode) == 8) {
2770 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2771 addr.index, addr.mem, new_val);
2773 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2774 addr.index, addr.mem, new_val);
2777 ir_set_throws_exception(new_node, throws_exception);
2779 set_irn_pinned(new_node, get_irn_pinned(node));
2780 set_ia32_op_type(new_node, ia32_AddrModeD);
2781 set_ia32_ls_mode(new_node, mode);
2783 set_address(new_node, &addr);
2784 SET_IA32_ORIG_NODE(new_node, node);
2790 * Transforms a Store.
2792 * @return the created ia32 Store node
2794 static ir_node *gen_Store(ir_node *node)
2796 ir_node *val = get_Store_value(node);
2797 ir_mode *mode = get_irn_mode(val);
2799 if (mode_is_float(mode) && is_Const(val)) {
2800 /* We can transform every floating const store
2801 into a sequence of integer stores.
2802 If the constant is already in a register,
2803 it would be better to use it, but we don't
2804 have this information here. */
2805 return gen_float_const_Store(node, val);
2807 return gen_general_Store(node);
2811 * Transforms a Switch.
2813 * @return the created ia32 SwitchJmp node
2815 static ir_node *create_Switch(ir_node *node)
2817 dbg_info *dbgi = get_irn_dbg_info(node);
2818 ir_node *block = be_transform_node(get_nodes_block(node));
2819 ir_node *sel = get_Cond_selector(node);
2820 ir_node *new_sel = be_transform_node(sel);
2821 long default_pn = get_Cond_default_proj(node);
2825 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2827 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2828 set_entity_visibility(entity, ir_visibility_private);
2829 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2831 /* TODO: we could perform some more matching here to also use the base
2832 * register of the address mode */
2834 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2835 set_ia32_am_scale(new_node, 2);
2836 set_ia32_am_sc(new_node, entity);
2837 set_ia32_op_type(new_node, ia32_AddrModeS);
2838 set_ia32_ls_mode(new_node, mode_Iu);
2839 SET_IA32_ORIG_NODE(new_node, node);
2845 * Transform a Cond node.
2847 static ir_node *gen_Cond(ir_node *node)
2849 ir_node *block = get_nodes_block(node);
2850 ir_node *new_block = be_transform_node(block);
2851 dbg_info *dbgi = get_irn_dbg_info(node);
2852 ir_node *sel = get_Cond_selector(node);
2853 ir_mode *sel_mode = get_irn_mode(sel);
2854 ir_node *flags = NULL;
2856 ia32_condition_code_t cc;
2858 if (sel_mode != mode_b) {
2859 return create_Switch(node);
2862 /* we get flags from a Cmp */
2863 flags = get_flags_node(sel, &cc);
2865 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2866 SET_IA32_ORIG_NODE(new_node, node);
2872 * Transform a be_Copy.
2874 static ir_node *gen_be_Copy(ir_node *node)
2876 ir_node *new_node = be_duplicate_node(node);
2877 ir_mode *mode = get_irn_mode(new_node);
2879 if (ia32_mode_needs_gp_reg(mode)) {
2880 set_irn_mode(new_node, mode_Iu);
2886 static ir_node *create_Fucom(ir_node *node)
2888 dbg_info *dbgi = get_irn_dbg_info(node);
2889 ir_node *block = get_nodes_block(node);
2890 ir_node *new_block = be_transform_node(block);
2891 ir_node *left = get_Cmp_left(node);
2892 ir_node *new_left = be_transform_node(left);
2893 ir_node *right = get_Cmp_right(node);
2897 if (ia32_cg_config.use_fucomi) {
2898 new_right = be_transform_node(right);
2899 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2901 set_ia32_commutative(new_node);
2902 SET_IA32_ORIG_NODE(new_node, node);
2904 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2905 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2907 new_right = be_transform_node(right);
2908 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2911 set_ia32_commutative(new_node);
2913 SET_IA32_ORIG_NODE(new_node, node);
2915 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2916 SET_IA32_ORIG_NODE(new_node, node);
2922 static ir_node *create_Ucomi(ir_node *node)
2924 dbg_info *dbgi = get_irn_dbg_info(node);
2925 ir_node *src_block = get_nodes_block(node);
2926 ir_node *new_block = be_transform_node(src_block);
2927 ir_node *left = get_Cmp_left(node);
2928 ir_node *right = get_Cmp_right(node);
2930 ia32_address_mode_t am;
2931 ia32_address_t *addr = &am.addr;
2933 match_arguments(&am, src_block, left, right, NULL,
2934 match_commutative | match_am);
2936 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2937 addr->mem, am.new_op1, am.new_op2,
2939 set_am_attributes(new_node, &am);
2941 SET_IA32_ORIG_NODE(new_node, node);
2943 new_node = fix_mem_proj(new_node, &am);
2949 * returns true if it is assured, that the upper bits of a node are "clean"
2950 * which means for a 16 or 8 bit value, that the upper bits in the register
2951 * are 0 for unsigned and a copy of the last significant bit for signed
2954 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2956 assert(ia32_mode_needs_gp_reg(mode));
2957 if (get_mode_size_bits(mode) >= 32)
2960 if (is_Proj(transformed_node))
2961 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2963 switch (get_ia32_irn_opcode(transformed_node)) {
2964 case iro_ia32_Conv_I2I:
2965 case iro_ia32_Conv_I2I8Bit: {
2966 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2967 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2969 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2976 if (mode_is_signed(mode)) {
2977 return false; /* TODO handle signed modes */
2979 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2980 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2981 const ia32_immediate_attr_t *attr
2982 = get_ia32_immediate_attr_const(right);
2983 if (attr->symconst == 0 &&
2984 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2988 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2992 /* TODO too conservative if shift amount is constant */
2993 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2996 if (!mode_is_signed(mode)) {
2998 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2999 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
3001 /* TODO if one is known to be zero extended, then || is sufficient */
3006 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
3007 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
3009 case iro_ia32_Const:
3010 case iro_ia32_Immediate: {
3011 const ia32_immediate_attr_t *attr =
3012 get_ia32_immediate_attr_const(transformed_node);
3013 if (mode_is_signed(mode)) {
3014 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
3015 return shifted == 0 || shifted == -1;
3017 unsigned long shifted = (unsigned long)attr->offset;
3018 shifted >>= get_mode_size_bits(mode);
3019 return shifted == 0;
3029 * Generate code for a Cmp.
3031 static ir_node *gen_Cmp(ir_node *node)
3033 dbg_info *dbgi = get_irn_dbg_info(node);
3034 ir_node *block = get_nodes_block(node);
3035 ir_node *new_block = be_transform_node(block);
3036 ir_node *left = get_Cmp_left(node);
3037 ir_node *right = get_Cmp_right(node);
3038 ir_mode *cmp_mode = get_irn_mode(left);
3040 ia32_address_mode_t am;
3041 ia32_address_t *addr = &am.addr;
3043 if (mode_is_float(cmp_mode)) {
3044 if (ia32_cg_config.use_sse2) {
3045 return create_Ucomi(node);
3047 return create_Fucom(node);
3051 assert(ia32_mode_needs_gp_reg(cmp_mode));
3053 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3054 if (is_Const_0(right) &&
3056 get_irn_n_edges(left) == 1) {
3057 /* Test(and_left, and_right) */
3058 ir_node *and_left = get_And_left(left);
3059 ir_node *and_right = get_And_right(left);
3061 /* matze: code here used mode instead of cmd_mode, I think it is always
3062 * the same as cmp_mode, but I leave this here to see if this is really
3065 assert(get_irn_mode(and_left) == cmp_mode);
3067 match_arguments(&am, block, and_left, and_right, NULL,
3069 match_am | match_8bit_am | match_16bit_am |
3070 match_am_and_immediates | match_immediate);
3072 /* use 32bit compare mode if possible since the opcode is smaller */
3073 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3074 upper_bits_clean(am.new_op2, cmp_mode)) {
3075 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3078 if (get_mode_size_bits(cmp_mode) == 8) {
3079 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3080 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3082 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3083 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3086 /* Cmp(left, right) */
3087 match_arguments(&am, block, left, right, NULL,
3088 match_commutative | match_am | match_8bit_am |
3089 match_16bit_am | match_am_and_immediates |
3091 /* use 32bit compare mode if possible since the opcode is smaller */
3092 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3093 upper_bits_clean(am.new_op2, cmp_mode)) {
3094 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3097 if (get_mode_size_bits(cmp_mode) == 8) {
3098 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3099 addr->index, addr->mem, am.new_op1,
3100 am.new_op2, am.ins_permuted);
3102 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3103 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3106 set_am_attributes(new_node, &am);
3107 set_ia32_ls_mode(new_node, cmp_mode);
3109 SET_IA32_ORIG_NODE(new_node, node);
3111 new_node = fix_mem_proj(new_node, &am);
3116 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3117 ia32_condition_code_t cc)
3119 dbg_info *dbgi = get_irn_dbg_info(node);
3120 ir_node *block = get_nodes_block(node);
3121 ir_node *new_block = be_transform_node(block);
3122 ir_node *val_true = get_Mux_true(node);
3123 ir_node *val_false = get_Mux_false(node);
3125 ia32_address_mode_t am;
3126 ia32_address_t *addr;
3128 assert(ia32_cg_config.use_cmov);
3129 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3133 match_arguments(&am, block, val_false, val_true, flags,
3134 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3136 if (am.ins_permuted)
3137 cc = ia32_negate_condition_code(cc);
3139 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3140 addr->mem, am.new_op1, am.new_op2, new_flags,
3142 set_am_attributes(new_node, &am);
3144 SET_IA32_ORIG_NODE(new_node, node);
3146 new_node = fix_mem_proj(new_node, &am);
3152 * Creates a ia32 Setcc instruction.
3154 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3155 ir_node *flags, ia32_condition_code_t cc,
3158 ir_mode *mode = get_irn_mode(orig_node);
3161 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3162 SET_IA32_ORIG_NODE(new_node, orig_node);
3164 /* we might need to conv the result up */
3165 if (get_mode_size_bits(mode) > 8) {
3166 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3167 nomem, new_node, mode_Bu);
3168 SET_IA32_ORIG_NODE(new_node, orig_node);
3175 * Create instruction for an unsigned Difference or Zero.
3177 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3179 ir_mode *mode = get_irn_mode(psi);
3189 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3190 match_mode_neutral | match_am | match_immediate | match_two_users);
3192 block = get_nodes_block(new_node);
3194 if (is_Proj(new_node)) {
3195 sub = get_Proj_pred(new_node);
3198 set_irn_mode(sub, mode_T);
3199 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3201 assert(is_ia32_Sub(sub));
3202 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3204 dbgi = get_irn_dbg_info(psi);
3205 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3206 notn = new_bd_ia32_Not(dbgi, block, sbb);
3208 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3209 set_ia32_commutative(new_node);
3214 * Create an const array of two float consts.
3216 * @param c0 the first constant
3217 * @param c1 the second constant
3218 * @param new_mode IN/OUT for the mode of the constants, if NULL
3219 * smallest possible mode will be used
3221 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3224 ir_mode *mode = *new_mode;
3226 ir_initializer_t *initializer;
3227 ir_tarval *tv0 = get_Const_tarval(c0);
3228 ir_tarval *tv1 = get_Const_tarval(c1);
3231 /* detect the best mode for the constants */
3232 mode = get_tarval_mode(tv0);
3234 if (mode != mode_F) {
3235 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3236 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3238 tv0 = tarval_convert_to(tv0, mode);
3239 tv1 = tarval_convert_to(tv1, mode);
3240 } else if (mode != mode_D) {
3241 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3242 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3244 tv0 = tarval_convert_to(tv0, mode);
3245 tv1 = tarval_convert_to(tv1, mode);
3252 tp = ia32_create_float_type(mode, 4);
3253 tp = ia32_create_float_array(tp);
3255 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3257 set_entity_ld_ident(ent, get_entity_ident(ent));
3258 set_entity_visibility(ent, ir_visibility_private);
3259 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3261 initializer = create_initializer_compound(2);
3263 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3264 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3266 set_entity_initializer(ent, initializer);
3273 * Possible transformations for creating a Setcc.
3275 enum setcc_transform_insn {
3288 typedef struct setcc_transform {
3290 ia32_condition_code_t cc;
3292 enum setcc_transform_insn transform;
3296 } setcc_transform_t;
3299 * Setcc can only handle 0 and 1 result.
3300 * Find a transformation that creates 0 and 1 from
3303 static void find_const_transform(ia32_condition_code_t cc,
3304 ir_tarval *t, ir_tarval *f,
3305 setcc_transform_t *res)
3311 if (tarval_is_null(t)) {
3315 cc = ia32_negate_condition_code(cc);
3316 } else if (tarval_cmp(t, f) == ir_relation_less) {
3317 // now, t is the bigger one
3321 cc = ia32_negate_condition_code(cc);
3325 if (! tarval_is_null(f)) {
3326 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3329 res->steps[step].transform = SETCC_TR_ADD;
3331 if (t == tarval_bad)
3332 panic("constant subtract failed");
3333 if (! tarval_is_long(f))
3334 panic("tarval is not long");
3336 res->steps[step].val = get_tarval_long(f);
3338 f = tarval_sub(f, f, NULL);
3339 assert(tarval_is_null(f));
3342 if (tarval_is_one(t)) {
3343 res->steps[step].transform = SETCC_TR_SET;
3344 res->num_steps = ++step;
3348 if (tarval_is_minus_one(t)) {
3349 res->steps[step].transform = SETCC_TR_NEG;
3351 res->steps[step].transform = SETCC_TR_SET;
3352 res->num_steps = ++step;
3355 if (tarval_is_long(t)) {
3356 long v = get_tarval_long(t);
3358 res->steps[step].val = 0;
3361 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3363 res->steps[step].transform = SETCC_TR_LEAxx;
3364 res->steps[step].scale = 3; /* (a << 3) + a */
3367 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3369 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3370 res->steps[step].scale = 3; /* (a << 3) */
3373 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3375 res->steps[step].transform = SETCC_TR_LEAxx;
3376 res->steps[step].scale = 2; /* (a << 2) + a */
3379 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3381 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3382 res->steps[step].scale = 2; /* (a << 2) */
3385 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3387 res->steps[step].transform = SETCC_TR_LEAxx;
3388 res->steps[step].scale = 1; /* (a << 1) + a */
3391 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3393 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3394 res->steps[step].scale = 1; /* (a << 1) */
3397 res->num_steps = step;
3400 if (! tarval_is_single_bit(t)) {
3401 res->steps[step].transform = SETCC_TR_AND;
3402 res->steps[step].val = v;
3404 res->steps[step].transform = SETCC_TR_NEG;
3406 int val = get_tarval_lowest_bit(t);
3409 res->steps[step].transform = SETCC_TR_SHL;
3410 res->steps[step].scale = val;
3414 res->steps[step].transform = SETCC_TR_SET;
3415 res->num_steps = ++step;
3418 panic("tarval is not long");
3422 * Transforms a Mux node into some code sequence.
3424 * @return The transformed node.
3426 static ir_node *gen_Mux(ir_node *node)
3428 dbg_info *dbgi = get_irn_dbg_info(node);
3429 ir_node *block = get_nodes_block(node);
3430 ir_node *new_block = be_transform_node(block);
3431 ir_node *mux_true = get_Mux_true(node);
3432 ir_node *mux_false = get_Mux_false(node);
3433 ir_node *sel = get_Mux_sel(node);
3434 ir_mode *mode = get_irn_mode(node);
3438 ia32_condition_code_t cc;
3440 assert(get_irn_mode(sel) == mode_b);
3442 is_abs = ir_mux_is_abs(sel, mux_true, mux_false);
3444 if (ia32_mode_needs_gp_reg(mode)) {
3445 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3448 ir_node *op = ir_get_abs_op(sel, mux_true, mux_false);
3449 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3453 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3454 if (mode_is_float(mode)) {
3455 ir_node *cmp_left = get_Cmp_left(sel);
3456 ir_node *cmp_right = get_Cmp_right(sel);
3457 ir_relation relation = get_Cmp_relation(sel);
3459 if (ia32_cg_config.use_sse2) {
3460 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3461 if (cmp_left == mux_true && cmp_right == mux_false) {
3462 /* Mux(a <= b, a, b) => MIN */
3463 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3464 match_commutative | match_am | match_two_users);
3465 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3466 /* Mux(a <= b, b, a) => MAX */
3467 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3468 match_commutative | match_am | match_two_users);
3470 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3471 if (cmp_left == mux_true && cmp_right == mux_false) {
3472 /* Mux(a >= b, a, b) => MAX */
3473 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3474 match_commutative | match_am | match_two_users);
3475 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3476 /* Mux(a >= b, b, a) => MIN */
3477 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3478 match_commutative | match_am | match_two_users);
3483 if (is_Const(mux_true) && is_Const(mux_false)) {
3484 ia32_address_mode_t am;
3489 flags = get_flags_node(sel, &cc);
3490 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3492 if (ia32_cg_config.use_sse2) {
3493 /* cannot load from different mode on SSE */
3496 /* x87 can load any mode */
3500 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3502 switch (get_mode_size_bytes(new_mode)) {
3512 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3513 set_ia32_am_scale(new_node, 2);
3518 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3519 set_ia32_am_scale(new_node, 1);
3522 /* arg, shift 16 NOT supported */
3524 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3527 panic("Unsupported constant size");
3530 am.ls_mode = new_mode;
3531 am.addr.base = get_symconst_base();
3532 am.addr.index = new_node;
3533 am.addr.mem = nomem;
3535 am.addr.scale = scale;
3536 am.addr.use_frame = 0;
3537 am.addr.tls_segment = false;
3538 am.addr.frame_entity = NULL;
3539 am.addr.symconst_sign = 0;
3540 am.mem_proj = am.addr.mem;
3541 am.op_type = ia32_AddrModeS;
3544 am.pinned = op_pin_state_floats;
3546 am.ins_permuted = false;
3548 if (ia32_cg_config.use_sse2)
3549 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3551 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3552 set_am_attributes(load, &am);
3554 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3556 panic("cannot transform floating point Mux");
3559 assert(ia32_mode_needs_gp_reg(mode));
3562 ir_node *cmp_left = get_Cmp_left(sel);
3563 ir_node *cmp_right = get_Cmp_right(sel);
3564 ir_relation relation = get_Cmp_relation(sel);
3565 ir_node *val_true = mux_true;
3566 ir_node *val_false = mux_false;
3568 if (is_Const(val_true) && is_Const_null(val_true)) {
3569 ir_node *tmp = val_false;
3570 val_false = val_true;
3572 relation = get_negated_relation(relation);
3574 if (is_Const_0(val_false) && is_Sub(val_true)) {
3575 if ((relation & ir_relation_greater)
3576 && get_Sub_left(val_true) == cmp_left
3577 && get_Sub_right(val_true) == cmp_right) {
3578 return create_doz(node, cmp_left, cmp_right);
3580 if ((relation & ir_relation_less)
3581 && get_Sub_left(val_true) == cmp_right
3582 && get_Sub_right(val_true) == cmp_left) {
3583 return create_doz(node, cmp_right, cmp_left);
3588 flags = get_flags_node(sel, &cc);
3590 if (is_Const(mux_true) && is_Const(mux_false)) {
3591 /* both are const, good */
3592 ir_tarval *tv_true = get_Const_tarval(mux_true);
3593 ir_tarval *tv_false = get_Const_tarval(mux_false);
3594 setcc_transform_t res;
3597 find_const_transform(cc, tv_true, tv_false, &res);
3599 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3602 switch (res.steps[step].transform) {
3604 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3605 add_ia32_am_offs_int(new_node, res.steps[step].val);
3607 case SETCC_TR_ADDxx:
3608 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3611 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3612 set_ia32_am_scale(new_node, res.steps[step].scale);
3613 set_ia32_am_offs_int(new_node, res.steps[step].val);
3615 case SETCC_TR_LEAxx:
3616 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3617 set_ia32_am_scale(new_node, res.steps[step].scale);
3618 set_ia32_am_offs_int(new_node, res.steps[step].val);
3621 imm = ia32_immediate_from_long(res.steps[step].scale);
3622 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3625 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3628 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3631 imm = ia32_immediate_from_long(res.steps[step].val);
3632 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3635 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3638 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3641 panic("unknown setcc transform");
3645 new_node = create_CMov(node, sel, flags, cc);
3653 * Create a conversion from x87 state register to general purpose.
3655 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3657 ir_node *block = be_transform_node(get_nodes_block(node));
3658 ir_node *op = get_Conv_op(node);
3659 ir_node *new_op = be_transform_node(op);
3660 ir_graph *irg = current_ir_graph;
3661 dbg_info *dbgi = get_irn_dbg_info(node);
3662 ir_mode *mode = get_irn_mode(node);
3663 ir_node *frame = get_irg_frame(irg);
3664 ir_node *fist, *load, *mem;
3666 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3667 set_irn_pinned(fist, op_pin_state_floats);
3668 set_ia32_use_frame(fist);
3669 set_ia32_op_type(fist, ia32_AddrModeD);
3671 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3672 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3674 assert(get_mode_size_bits(mode) <= 32);
3675 /* exception we can only store signed 32 bit integers, so for unsigned
3676 we store a 64bit (signed) integer and load the lower bits */
3677 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3678 set_ia32_ls_mode(fist, mode_Ls);
3680 set_ia32_ls_mode(fist, mode_Is);
3682 SET_IA32_ORIG_NODE(fist, node);
3685 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3687 set_irn_pinned(load, op_pin_state_floats);
3688 set_ia32_use_frame(load);
3689 set_ia32_op_type(load, ia32_AddrModeS);
3690 set_ia32_ls_mode(load, mode_Is);
3691 if (get_ia32_ls_mode(fist) == mode_Ls) {
3692 ia32_attr_t *attr = get_ia32_attr(load);
3693 attr->data.need_64bit_stackent = 1;
3695 ia32_attr_t *attr = get_ia32_attr(load);
3696 attr->data.need_32bit_stackent = 1;
3698 SET_IA32_ORIG_NODE(load, node);
3700 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3704 * Creates a x87 strict Conv by placing a Store and a Load
3706 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3708 ir_node *block = get_nodes_block(node);
3709 ir_graph *irg = get_Block_irg(block);
3710 dbg_info *dbgi = get_irn_dbg_info(node);
3711 ir_node *frame = get_irg_frame(irg);
3713 ir_node *store, *load;
3716 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3717 set_ia32_use_frame(store);
3718 set_ia32_op_type(store, ia32_AddrModeD);
3719 SET_IA32_ORIG_NODE(store, node);
3721 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3723 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3724 set_ia32_use_frame(load);
3725 set_ia32_op_type(load, ia32_AddrModeS);
3726 SET_IA32_ORIG_NODE(load, node);
3728 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3732 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3733 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3735 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3737 func = get_mode_size_bits(mode) == 8 ?
3738 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3739 return func(dbgi, block, base, index, mem, val, mode);
3743 * Create a conversion from general purpose to x87 register
3745 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3747 ir_node *src_block = get_nodes_block(node);
3748 ir_node *block = be_transform_node(src_block);
3749 ir_graph *irg = get_Block_irg(block);
3750 dbg_info *dbgi = get_irn_dbg_info(node);
3751 ir_node *op = get_Conv_op(node);
3752 ir_node *new_op = NULL;
3754 ir_mode *store_mode;
3760 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3761 if (possible_int_mode_for_fp(src_mode)) {
3762 ia32_address_mode_t am;
3764 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3765 if (am.op_type == ia32_AddrModeS) {
3766 ia32_address_t *addr = &am.addr;
3768 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3769 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3771 set_am_attributes(fild, &am);
3772 SET_IA32_ORIG_NODE(fild, node);
3774 fix_mem_proj(fild, &am);
3779 if (new_op == NULL) {
3780 new_op = be_transform_node(op);
3783 mode = get_irn_mode(op);
3785 /* first convert to 32 bit signed if necessary */
3786 if (get_mode_size_bits(src_mode) < 32) {
3787 if (!upper_bits_clean(new_op, src_mode)) {
3788 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3789 SET_IA32_ORIG_NODE(new_op, node);
3794 assert(get_mode_size_bits(mode) == 32);
3797 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3799 set_ia32_use_frame(store);
3800 set_ia32_op_type(store, ia32_AddrModeD);
3801 set_ia32_ls_mode(store, mode_Iu);
3803 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3805 /* exception for 32bit unsigned, do a 64bit spill+load */
3806 if (!mode_is_signed(mode)) {
3809 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3811 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3812 noreg_GP, nomem, zero_const);
3813 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3815 set_ia32_use_frame(zero_store);
3816 set_ia32_op_type(zero_store, ia32_AddrModeD);
3817 add_ia32_am_offs_int(zero_store, 4);
3818 set_ia32_ls_mode(zero_store, mode_Iu);
3820 in[0] = zero_store_mem;
3823 store_mem = new_rd_Sync(dbgi, block, 2, in);
3824 store_mode = mode_Ls;
3826 store_mode = mode_Is;
3830 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3832 set_ia32_use_frame(fild);
3833 set_ia32_op_type(fild, ia32_AddrModeS);
3834 set_ia32_ls_mode(fild, store_mode);
3836 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3842 * Create a conversion from one integer mode into another one
3844 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3845 dbg_info *dbgi, ir_node *block, ir_node *op,
3848 ir_node *new_block = be_transform_node(block);
3850 ir_mode *smaller_mode;
3851 ia32_address_mode_t am;
3852 ia32_address_t *addr = &am.addr;
3855 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3856 smaller_mode = src_mode;
3858 smaller_mode = tgt_mode;
3861 #ifdef DEBUG_libfirm
3863 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3868 match_arguments(&am, block, NULL, op, NULL,
3869 match_am | match_8bit_am | match_16bit_am);
3871 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3872 /* unnecessary conv. in theory it shouldn't have been AM */
3873 assert(is_ia32_NoReg_GP(addr->base));
3874 assert(is_ia32_NoReg_GP(addr->index));
3875 assert(is_NoMem(addr->mem));
3876 assert(am.addr.offset == 0);
3877 assert(am.addr.symconst_ent == NULL);
3881 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3882 addr->mem, am.new_op2, smaller_mode);
3883 set_am_attributes(new_node, &am);
3884 /* match_arguments assume that out-mode = in-mode, this isn't true here
3886 set_ia32_ls_mode(new_node, smaller_mode);
3887 SET_IA32_ORIG_NODE(new_node, node);
3888 new_node = fix_mem_proj(new_node, &am);
3893 * Transforms a Conv node.
3895 * @return The created ia32 Conv node
3897 static ir_node *gen_Conv(ir_node *node)
3899 ir_node *block = get_nodes_block(node);
3900 ir_node *new_block = be_transform_node(block);
3901 ir_node *op = get_Conv_op(node);
3902 ir_node *new_op = NULL;
3903 dbg_info *dbgi = get_irn_dbg_info(node);
3904 ir_mode *src_mode = get_irn_mode(op);
3905 ir_mode *tgt_mode = get_irn_mode(node);
3906 int src_bits = get_mode_size_bits(src_mode);
3907 int tgt_bits = get_mode_size_bits(tgt_mode);
3908 ir_node *res = NULL;
3910 assert(!mode_is_int(src_mode) || src_bits <= 32);
3911 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3913 /* modeB -> X should already be lowered by the lower_mode_b pass */
3914 if (src_mode == mode_b) {
3915 panic("ConvB not lowered %+F", node);
3918 if (src_mode == tgt_mode) {
3919 if (get_Conv_strict(node)) {
3920 if (ia32_cg_config.use_sse2) {
3921 /* when we are in SSE mode, we can kill all strict no-op conversion */
3922 return be_transform_node(op);
3925 /* this should be optimized already, but who knows... */
3926 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3927 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3928 return be_transform_node(op);
3932 if (mode_is_float(src_mode)) {
3933 new_op = be_transform_node(op);
3934 /* we convert from float ... */
3935 if (mode_is_float(tgt_mode)) {
3937 if (ia32_cg_config.use_sse2) {
3938 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3939 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3941 set_ia32_ls_mode(res, tgt_mode);
3943 if (get_Conv_strict(node)) {
3944 /* if fp_no_float_fold is not set then we assume that we
3945 * don't have any float operations in a non
3946 * mode_float_arithmetic mode and can skip strict upconvs */
3947 if (src_bits < tgt_bits) {
3948 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3951 res = gen_x87_strict_conv(tgt_mode, new_op);
3952 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3956 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3961 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3962 if (ia32_cg_config.use_sse2) {
3963 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3965 set_ia32_ls_mode(res, src_mode);
3967 return gen_x87_fp_to_gp(node);
3971 /* we convert from int ... */
3972 if (mode_is_float(tgt_mode)) {
3974 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3975 if (ia32_cg_config.use_sse2) {
3976 new_op = be_transform_node(op);
3977 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3979 set_ia32_ls_mode(res, tgt_mode);
3981 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3982 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3983 res = gen_x87_gp_to_fp(node, src_mode);
3985 /* we need a strict-Conv, if the int mode has more bits than the
3987 if (float_mantissa < int_mantissa) {
3988 res = gen_x87_strict_conv(tgt_mode, res);
3989 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3993 } else if (tgt_mode == mode_b) {
3994 /* mode_b lowering already took care that we only have 0/1 values */
3995 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3996 src_mode, tgt_mode));
3997 return be_transform_node(op);
4000 if (src_bits == tgt_bits) {
4001 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
4002 src_mode, tgt_mode));
4003 return be_transform_node(op);
4006 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
4014 static ir_node *create_immediate_or_transform(ir_node *node,
4015 char immediate_constraint_type)
4017 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
4018 if (new_node == NULL) {
4019 new_node = be_transform_node(node);
4025 * Transforms a FrameAddr into an ia32 Add.
4027 static ir_node *gen_be_FrameAddr(ir_node *node)
4029 ir_node *block = be_transform_node(get_nodes_block(node));
4030 ir_node *op = be_get_FrameAddr_frame(node);
4031 ir_node *new_op = be_transform_node(op);
4032 dbg_info *dbgi = get_irn_dbg_info(node);
4035 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
4036 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
4037 set_ia32_use_frame(new_node);
4039 SET_IA32_ORIG_NODE(new_node, node);
4045 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4047 static ir_node *gen_be_Return(ir_node *node)
4049 ir_graph *irg = current_ir_graph;
4050 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4051 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4052 ir_node *new_ret_val = be_transform_node(ret_val);
4053 ir_node *new_ret_mem = be_transform_node(ret_mem);
4054 ir_entity *ent = get_irg_entity(irg);
4055 ir_type *tp = get_entity_type(ent);
4056 dbg_info *dbgi = get_irn_dbg_info(node);
4057 ir_node *block = be_transform_node(get_nodes_block(node));
4071 assert(ret_val != NULL);
4072 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4073 return be_duplicate_node(node);
4076 res_type = get_method_res_type(tp, 0);
4078 if (! is_Primitive_type(res_type)) {
4079 return be_duplicate_node(node);
4082 mode = get_type_mode(res_type);
4083 if (! mode_is_float(mode)) {
4084 return be_duplicate_node(node);
4087 assert(get_method_n_ress(tp) == 1);
4089 frame = get_irg_frame(irg);
4091 /* store xmm0 onto stack */
4092 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4093 new_ret_mem, new_ret_val);
4094 set_ia32_ls_mode(sse_store, mode);
4095 set_ia32_op_type(sse_store, ia32_AddrModeD);
4096 set_ia32_use_frame(sse_store);
4097 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4099 /* load into x87 register */
4100 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4101 set_ia32_op_type(fld, ia32_AddrModeS);
4102 set_ia32_use_frame(fld);
4104 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4105 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4107 /* create a new return */
4108 arity = get_irn_arity(node);
4109 in = ALLOCAN(ir_node*, arity);
4110 pop = be_Return_get_pop(node);
4111 for (i = 0; i < arity; ++i) {
4112 ir_node *op = get_irn_n(node, i);
4113 if (op == ret_val) {
4115 } else if (op == ret_mem) {
4118 in[i] = be_transform_node(op);
4121 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4122 copy_node_attr(irg, node, new_node);
4128 * Transform a be_AddSP into an ia32_SubSP.
4130 static ir_node *gen_be_AddSP(ir_node *node)
4132 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4133 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4135 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4136 match_am | match_immediate);
4137 assert(is_ia32_SubSP(new_node));
4138 arch_irn_set_register(new_node, pn_ia32_SubSP_stack,
4139 &ia32_registers[REG_ESP]);
4144 * Transform a be_SubSP into an ia32_AddSP
4146 static ir_node *gen_be_SubSP(ir_node *node)
4148 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4149 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4151 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4152 match_am | match_immediate);
4153 assert(is_ia32_AddSP(new_node));
4154 arch_irn_set_register(new_node, pn_ia32_AddSP_stack,
4155 &ia32_registers[REG_ESP]);
4160 * Change some phi modes
4162 static ir_node *gen_Phi(ir_node *node)
4164 const arch_register_req_t *req;
4165 ir_node *block = be_transform_node(get_nodes_block(node));
4166 ir_graph *irg = current_ir_graph;
4167 dbg_info *dbgi = get_irn_dbg_info(node);
4168 ir_mode *mode = get_irn_mode(node);
4171 if (ia32_mode_needs_gp_reg(mode)) {
4172 /* we shouldn't have any 64bit stuff around anymore */
4173 assert(get_mode_size_bits(mode) <= 32);
4174 /* all integer operations are on 32bit registers now */
4176 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4177 } else if (mode_is_float(mode)) {
4178 if (ia32_cg_config.use_sse2) {
4180 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4183 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4186 req = arch_no_register_req;
4189 /* phi nodes allow loops, so we use the old arguments for now
4190 * and fix this later */
4191 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4192 get_irn_in(node) + 1);
4193 copy_node_attr(irg, node, phi);
4194 be_duplicate_deps(node, phi);
4196 arch_set_out_register_req(phi, 0, req);
4198 be_enqueue_preds(node);
4203 static ir_node *gen_Jmp(ir_node *node)
4205 ir_node *block = get_nodes_block(node);
4206 ir_node *new_block = be_transform_node(block);
4207 dbg_info *dbgi = get_irn_dbg_info(node);
4210 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4211 SET_IA32_ORIG_NODE(new_node, node);
4219 static ir_node *gen_IJmp(ir_node *node)
4221 ir_node *block = get_nodes_block(node);
4222 ir_node *new_block = be_transform_node(block);
4223 dbg_info *dbgi = get_irn_dbg_info(node);
4224 ir_node *op = get_IJmp_target(node);
4226 ia32_address_mode_t am;
4227 ia32_address_t *addr = &am.addr;
4229 assert(get_irn_mode(op) == mode_P);
4231 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4233 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4234 addr->mem, am.new_op2);
4235 set_am_attributes(new_node, &am);
4236 SET_IA32_ORIG_NODE(new_node, node);
4238 new_node = fix_mem_proj(new_node, &am);
4243 static ir_node *gen_ia32_l_Add(ir_node *node)
4245 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4246 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4247 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4248 match_commutative | match_am | match_immediate |
4249 match_mode_neutral);
4251 if (is_Proj(lowered)) {
4252 lowered = get_Proj_pred(lowered);
4254 assert(is_ia32_Add(lowered));
4255 set_irn_mode(lowered, mode_T);
4261 static ir_node *gen_ia32_l_Adc(ir_node *node)
4263 return gen_binop_flags(node, new_bd_ia32_Adc,
4264 match_commutative | match_am | match_immediate |
4265 match_mode_neutral);
4269 * Transforms a l_MulS into a "real" MulS node.
4271 * @return the created ia32 Mul node
4273 static ir_node *gen_ia32_l_Mul(ir_node *node)
4275 ir_node *left = get_binop_left(node);
4276 ir_node *right = get_binop_right(node);
4278 return gen_binop(node, left, right, new_bd_ia32_Mul,
4279 match_commutative | match_am | match_mode_neutral);
4283 * Transforms a l_IMulS into a "real" IMul1OPS node.
4285 * @return the created ia32 IMul1OP node
4287 static ir_node *gen_ia32_l_IMul(ir_node *node)
4289 ir_node *left = get_binop_left(node);
4290 ir_node *right = get_binop_right(node);
4292 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4293 match_commutative | match_am | match_mode_neutral);
4296 static ir_node *gen_ia32_l_Sub(ir_node *node)
4298 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4299 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4300 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4301 match_am | match_immediate | match_mode_neutral);
4303 if (is_Proj(lowered)) {
4304 lowered = get_Proj_pred(lowered);
4306 assert(is_ia32_Sub(lowered));
4307 set_irn_mode(lowered, mode_T);
4313 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4315 return gen_binop_flags(node, new_bd_ia32_Sbb,
4316 match_am | match_immediate | match_mode_neutral);
4319 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4321 ir_node *src_block = get_nodes_block(node);
4322 ir_node *block = be_transform_node(src_block);
4323 ir_graph *irg = current_ir_graph;
4324 dbg_info *dbgi = get_irn_dbg_info(node);
4325 ir_node *frame = get_irg_frame(irg);
4326 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4327 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4328 ir_node *new_val_low = be_transform_node(val_low);
4329 ir_node *new_val_high = be_transform_node(val_high);
4331 ir_node *sync, *fild, *res;
4333 ir_node *store_high;
4337 if (ia32_cg_config.use_sse2) {
4338 panic("ia32_l_LLtoFloat not implemented for SSE2");
4342 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4344 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4346 SET_IA32_ORIG_NODE(store_low, node);
4347 SET_IA32_ORIG_NODE(store_high, node);
4349 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4350 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4352 set_ia32_use_frame(store_low);
4353 set_ia32_use_frame(store_high);
4354 set_ia32_op_type(store_low, ia32_AddrModeD);
4355 set_ia32_op_type(store_high, ia32_AddrModeD);
4356 set_ia32_ls_mode(store_low, mode_Iu);
4357 set_ia32_ls_mode(store_high, mode_Is);
4358 add_ia32_am_offs_int(store_high, 4);
4362 sync = new_rd_Sync(dbgi, block, 2, in);
4365 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4367 set_ia32_use_frame(fild);
4368 set_ia32_op_type(fild, ia32_AddrModeS);
4369 set_ia32_ls_mode(fild, mode_Ls);
4371 SET_IA32_ORIG_NODE(fild, node);
4373 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4375 if (! mode_is_signed(get_irn_mode(val_high))) {
4376 ia32_address_mode_t am;
4378 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4381 am.addr.base = get_symconst_base();
4382 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4383 am.addr.mem = nomem;
4386 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4387 am.addr.tls_segment = false;
4388 am.addr.use_frame = 0;
4389 am.addr.frame_entity = NULL;
4390 am.addr.symconst_sign = 0;
4391 am.ls_mode = mode_F;
4392 am.mem_proj = nomem;
4393 am.op_type = ia32_AddrModeS;
4395 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4396 am.pinned = op_pin_state_floats;
4398 am.ins_permuted = false;
4400 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4401 am.new_op1, am.new_op2, get_fpcw());
4402 set_am_attributes(fadd, &am);
4404 set_irn_mode(fadd, mode_T);
4405 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4410 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4412 ir_node *src_block = get_nodes_block(node);
4413 ir_node *block = be_transform_node(src_block);
4414 ir_graph *irg = get_Block_irg(block);
4415 dbg_info *dbgi = get_irn_dbg_info(node);
4416 ir_node *frame = get_irg_frame(irg);
4417 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4418 ir_node *new_val = be_transform_node(val);
4421 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4422 SET_IA32_ORIG_NODE(fist, node);
4423 set_ia32_use_frame(fist);
4424 set_ia32_op_type(fist, ia32_AddrModeD);
4425 set_ia32_ls_mode(fist, mode_Ls);
4427 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4428 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4431 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4433 ir_node *block = be_transform_node(get_nodes_block(node));
4434 ir_graph *irg = get_Block_irg(block);
4435 ir_node *pred = get_Proj_pred(node);
4436 ir_node *new_pred = be_transform_node(pred);
4437 ir_node *frame = get_irg_frame(irg);
4438 dbg_info *dbgi = get_irn_dbg_info(node);
4439 long pn = get_Proj_proj(node);
4444 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4445 SET_IA32_ORIG_NODE(load, node);
4446 set_ia32_use_frame(load);
4447 set_ia32_op_type(load, ia32_AddrModeS);
4448 set_ia32_ls_mode(load, mode_Iu);
4449 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4450 * 32 bit from it with this particular load */
4451 attr = get_ia32_attr(load);
4452 attr->data.need_64bit_stackent = 1;
4454 if (pn == pn_ia32_l_FloattoLL_res_high) {
4455 add_ia32_am_offs_int(load, 4);
4457 assert(pn == pn_ia32_l_FloattoLL_res_low);
4460 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4466 * Transform the Projs of an AddSP.
4468 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4470 ir_node *pred = get_Proj_pred(node);
4471 ir_node *new_pred = be_transform_node(pred);
4472 dbg_info *dbgi = get_irn_dbg_info(node);
4473 long proj = get_Proj_proj(node);
4475 if (proj == pn_be_AddSP_sp) {
4476 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4477 pn_ia32_SubSP_stack);
4478 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4480 } else if (proj == pn_be_AddSP_res) {
4481 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4482 pn_ia32_SubSP_addr);
4483 } else if (proj == pn_be_AddSP_M) {
4484 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4487 panic("No idea how to transform proj->AddSP");
4491 * Transform the Projs of a SubSP.
4493 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4495 ir_node *pred = get_Proj_pred(node);
4496 ir_node *new_pred = be_transform_node(pred);
4497 dbg_info *dbgi = get_irn_dbg_info(node);
4498 long proj = get_Proj_proj(node);
4500 if (proj == pn_be_SubSP_sp) {
4501 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4502 pn_ia32_AddSP_stack);
4503 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4505 } else if (proj == pn_be_SubSP_M) {
4506 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4509 panic("No idea how to transform proj->SubSP");
4513 * Transform and renumber the Projs from a Load.
4515 static ir_node *gen_Proj_Load(ir_node *node)
4518 ir_node *pred = get_Proj_pred(node);
4519 dbg_info *dbgi = get_irn_dbg_info(node);
4520 long proj = get_Proj_proj(node);
4522 /* loads might be part of source address mode matches, so we don't
4523 * transform the ProjMs yet (with the exception of loads whose result is
4526 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4529 /* this is needed, because sometimes we have loops that are only
4530 reachable through the ProjM */
4531 be_enqueue_preds(node);
4532 /* do it in 2 steps, to silence firm verifier */
4533 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4534 set_Proj_proj(res, pn_ia32_mem);
4538 /* renumber the proj */
4539 new_pred = be_transform_node(pred);
4540 if (is_ia32_Load(new_pred)) {
4541 switch ((pn_Load)proj) {
4543 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4545 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4546 case pn_Load_X_except:
4547 /* This Load might raise an exception. Mark it. */
4548 set_ia32_exc_label(new_pred, 1);
4549 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4550 case pn_Load_X_regular:
4551 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4553 } else if (is_ia32_Conv_I2I(new_pred) ||
4554 is_ia32_Conv_I2I8Bit(new_pred)) {
4555 set_irn_mode(new_pred, mode_T);
4556 switch ((pn_Load)proj) {
4558 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4560 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4561 case pn_Load_X_except:
4562 /* This Load might raise an exception. Mark it. */
4563 set_ia32_exc_label(new_pred, 1);
4564 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4565 case pn_Load_X_regular:
4566 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4568 } else if (is_ia32_xLoad(new_pred)) {
4569 switch ((pn_Load)proj) {
4571 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4573 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4574 case pn_Load_X_except:
4575 /* This Load might raise an exception. Mark it. */
4576 set_ia32_exc_label(new_pred, 1);
4577 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4578 case pn_Load_X_regular:
4579 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4581 } else if (is_ia32_vfld(new_pred)) {
4582 switch ((pn_Load)proj) {
4584 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4587 case pn_Load_X_except:
4588 /* This Load might raise an exception. Mark it. */
4589 set_ia32_exc_label(new_pred, 1);
4590 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4591 case pn_Load_X_regular:
4592 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4595 /* can happen for ProJMs when source address mode happened for the
4598 /* however it should not be the result proj, as that would mean the
4599 load had multiple users and should not have been used for
4601 if (proj != pn_Load_M) {
4602 panic("internal error: transformed node not a Load");
4604 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4607 panic("No idea how to transform Proj(Load) %+F", node);
4610 static ir_node *gen_Proj_Store(ir_node *node)
4612 ir_node *pred = get_Proj_pred(node);
4613 ir_node *new_pred = be_transform_node(pred);
4614 dbg_info *dbgi = get_irn_dbg_info(node);
4615 long pn = get_Proj_proj(node);
4617 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4618 switch ((pn_Store)pn) {
4620 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4621 case pn_Store_X_except:
4622 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4623 case pn_Store_X_regular:
4624 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4626 } else if (is_ia32_vfist(new_pred)) {
4627 switch ((pn_Store)pn) {
4629 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4630 case pn_Store_X_except:
4631 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4632 case pn_Store_X_regular:
4633 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4635 } else if (is_ia32_vfisttp(new_pred)) {
4636 switch ((pn_Store)pn) {
4638 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4639 case pn_Store_X_except:
4640 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4641 case pn_Store_X_regular:
4642 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4644 } else if (is_ia32_vfst(new_pred)) {
4645 switch ((pn_Store)pn) {
4647 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4648 case pn_Store_X_except:
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4650 case pn_Store_X_regular:
4651 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4653 } else if (is_ia32_xStore(new_pred)) {
4654 switch ((pn_Store)pn) {
4656 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4657 case pn_Store_X_except:
4658 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4659 case pn_Store_X_regular:
4660 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4662 } else if (is_Sync(new_pred)) {
4663 /* hack for the case that gen_float_const_Store produced a Sync */
4664 if (pn == pn_Store_M) {
4667 panic("exception control flow for gen_float_const_Store not implemented yet");
4668 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4669 /* destination address mode */
4670 if (pn == pn_Store_M) {
4673 panic("exception control flow for destination AM not implemented yet");
4676 panic("No idea how to transform Proj(Store) %+F", node);
4680 * Transform and renumber the Projs from a Div or Mod instruction.
4682 static ir_node *gen_Proj_Div(ir_node *node)
4684 ir_node *pred = get_Proj_pred(node);
4685 ir_node *new_pred = be_transform_node(pred);
4686 dbg_info *dbgi = get_irn_dbg_info(node);
4687 long proj = get_Proj_proj(node);
4689 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4690 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4692 switch ((pn_Div)proj) {
4694 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4695 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4696 } else if (is_ia32_xDiv(new_pred)) {
4697 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4698 } else if (is_ia32_vfdiv(new_pred)) {
4699 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4701 panic("Div transformed to unexpected thing %+F", new_pred);
4704 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4705 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4706 } else if (is_ia32_xDiv(new_pred)) {
4707 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4708 } else if (is_ia32_vfdiv(new_pred)) {
4709 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4711 panic("Div transformed to unexpected thing %+F", new_pred);
4713 case pn_Div_X_except:
4714 set_ia32_exc_label(new_pred, 1);
4715 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4716 case pn_Div_X_regular:
4717 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4720 panic("No idea how to transform proj->Div");
4724 * Transform and renumber the Projs from a Div or Mod instruction.
4726 static ir_node *gen_Proj_Mod(ir_node *node)
4728 ir_node *pred = get_Proj_pred(node);
4729 ir_node *new_pred = be_transform_node(pred);
4730 dbg_info *dbgi = get_irn_dbg_info(node);
4731 long proj = get_Proj_proj(node);
4733 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4734 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4735 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4737 switch ((pn_Mod)proj) {
4739 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4741 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4742 case pn_Mod_X_except:
4743 set_ia32_exc_label(new_pred, 1);
4744 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4745 case pn_Mod_X_regular:
4746 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4748 panic("No idea how to transform proj->Mod");
4752 * Transform and renumber the Projs from a CopyB.
4754 static ir_node *gen_Proj_CopyB(ir_node *node)
4756 ir_node *pred = get_Proj_pred(node);
4757 ir_node *new_pred = be_transform_node(pred);
4758 dbg_info *dbgi = get_irn_dbg_info(node);
4759 long proj = get_Proj_proj(node);
4761 switch ((pn_CopyB)proj) {
4763 if (is_ia32_CopyB_i(new_pred)) {
4764 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4765 } else if (is_ia32_CopyB(new_pred)) {
4766 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4769 case pn_CopyB_X_regular:
4770 if (is_ia32_CopyB_i(new_pred)) {
4771 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4772 } else if (is_ia32_CopyB(new_pred)) {
4773 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4776 case pn_CopyB_X_except:
4777 if (is_ia32_CopyB_i(new_pred)) {
4778 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4779 } else if (is_ia32_CopyB(new_pred)) {
4780 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4785 panic("No idea how to transform proj->CopyB");
4788 static ir_node *gen_be_Call(ir_node *node)
4790 dbg_info *const dbgi = get_irn_dbg_info(node);
4791 ir_node *const src_block = get_nodes_block(node);
4792 ir_node *const block = be_transform_node(src_block);
4793 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4794 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4795 ir_node *const sp = be_transform_node(src_sp);
4796 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4797 ia32_address_mode_t am;
4798 ia32_address_t *const addr = &am.addr;
4803 ir_node * eax = noreg_GP;
4804 ir_node * ecx = noreg_GP;
4805 ir_node * edx = noreg_GP;
4806 unsigned const pop = be_Call_get_pop(node);
4807 ir_type *const call_tp = be_Call_get_type(node);
4808 int old_no_pic_adjust;
4809 int throws_exception = ir_throws_exception(node);
4811 /* Run the x87 simulator if the call returns a float value */
4812 if (get_method_n_ress(call_tp) > 0) {
4813 ir_type *const res_type = get_method_res_type(call_tp, 0);
4814 ir_mode *const res_mode = get_type_mode(res_type);
4816 if (res_mode != NULL && mode_is_float(res_mode)) {
4817 ir_graph *irg = current_ir_graph;
4818 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4819 irg_data->do_x87_sim = 1;
4823 /* We do not want be_Call direct calls */
4824 assert(be_Call_get_entity(node) == NULL);
4826 /* special case for PIC trampoline calls */
4827 old_no_pic_adjust = ia32_no_pic_adjust;
4828 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4830 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4831 match_am | match_immediate);
4833 ia32_no_pic_adjust = old_no_pic_adjust;
4835 i = get_irn_arity(node) - 1;
4836 fpcw = be_transform_node(get_irn_n(node, i--));
4837 for (; i >= n_be_Call_first_arg; --i) {
4838 arch_register_req_t const *const req = arch_get_register_req(node, i);
4839 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4841 assert(req->type == arch_register_req_type_limited);
4842 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4844 switch (*req->limited) {
4845 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4846 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4847 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4848 default: panic("Invalid GP register for register parameter");
4852 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4853 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4854 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4855 ir_set_throws_exception(call, throws_exception);
4856 set_am_attributes(call, &am);
4857 call = fix_mem_proj(call, &am);
4859 if (get_irn_pinned(node) == op_pin_state_pinned)
4860 set_irn_pinned(call, op_pin_state_pinned);
4862 SET_IA32_ORIG_NODE(call, node);
4864 if (ia32_cg_config.use_sse2) {
4865 /* remember this call for post-processing */
4866 ARR_APP1(ir_node *, call_list, call);
4867 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4874 * Transform Builtin trap
4876 static ir_node *gen_trap(ir_node *node)
4878 dbg_info *dbgi = get_irn_dbg_info(node);
4879 ir_node *block = be_transform_node(get_nodes_block(node));
4880 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4882 return new_bd_ia32_UD2(dbgi, block, mem);
4886 * Transform Builtin debugbreak
4888 static ir_node *gen_debugbreak(ir_node *node)
4890 dbg_info *dbgi = get_irn_dbg_info(node);
4891 ir_node *block = be_transform_node(get_nodes_block(node));
4892 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4894 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4898 * Transform Builtin return_address
4900 static ir_node *gen_return_address(ir_node *node)
4902 ir_node *param = get_Builtin_param(node, 0);
4903 ir_node *frame = get_Builtin_param(node, 1);
4904 dbg_info *dbgi = get_irn_dbg_info(node);
4905 ir_tarval *tv = get_Const_tarval(param);
4906 ir_graph *irg = get_irn_irg(node);
4907 unsigned long value = get_tarval_long(tv);
4909 ir_node *block = be_transform_node(get_nodes_block(node));
4910 ir_node *ptr = be_transform_node(frame);
4914 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4915 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4916 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4919 /* load the return address from this frame */
4920 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4922 set_irn_pinned(load, get_irn_pinned(node));
4923 set_ia32_op_type(load, ia32_AddrModeS);
4924 set_ia32_ls_mode(load, mode_Iu);
4926 set_ia32_am_offs_int(load, 0);
4927 set_ia32_use_frame(load);
4928 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4930 if (get_irn_pinned(node) == op_pin_state_floats) {
4931 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4932 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4933 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4934 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4937 SET_IA32_ORIG_NODE(load, node);
4938 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4942 * Transform Builtin frame_address
4944 static ir_node *gen_frame_address(ir_node *node)
4946 ir_node *param = get_Builtin_param(node, 0);
4947 ir_node *frame = get_Builtin_param(node, 1);
4948 dbg_info *dbgi = get_irn_dbg_info(node);
4949 ir_tarval *tv = get_Const_tarval(param);
4950 ir_graph *irg = get_irn_irg(node);
4951 unsigned long value = get_tarval_long(tv);
4953 ir_node *block = be_transform_node(get_nodes_block(node));
4954 ir_node *ptr = be_transform_node(frame);
4959 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4960 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4961 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4964 /* load the frame address from this frame */
4965 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4967 set_irn_pinned(load, get_irn_pinned(node));
4968 set_ia32_op_type(load, ia32_AddrModeS);
4969 set_ia32_ls_mode(load, mode_Iu);
4971 ent = ia32_get_frame_address_entity(irg);
4973 set_ia32_am_offs_int(load, 0);
4974 set_ia32_use_frame(load);
4975 set_ia32_frame_ent(load, ent);
4977 /* will fail anyway, but gcc does this: */
4978 set_ia32_am_offs_int(load, 0);
4981 if (get_irn_pinned(node) == op_pin_state_floats) {
4982 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4983 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4984 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4985 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4988 SET_IA32_ORIG_NODE(load, node);
4989 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4993 * Transform Builtin frame_address
4995 static ir_node *gen_prefetch(ir_node *node)
4998 ir_node *ptr, *block, *mem, *base, *idx;
4999 ir_node *param, *new_node;
5002 ia32_address_t addr;
5004 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
5005 /* no prefetch at all, route memory */
5006 return be_transform_node(get_Builtin_mem(node));
5009 param = get_Builtin_param(node, 1);
5010 tv = get_Const_tarval(param);
5011 rw = get_tarval_long(tv);
5013 /* construct load address */
5014 memset(&addr, 0, sizeof(addr));
5015 ptr = get_Builtin_param(node, 0);
5016 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5023 base = be_transform_node(base);
5029 idx = be_transform_node(idx);
5032 dbgi = get_irn_dbg_info(node);
5033 block = be_transform_node(get_nodes_block(node));
5034 mem = be_transform_node(get_Builtin_mem(node));
5036 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
5037 /* we have 3DNow!, this was already checked above */
5038 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
5039 } else if (ia32_cg_config.use_sse_prefetch) {
5040 /* note: rw == 1 is IGNORED in that case */
5041 param = get_Builtin_param(node, 2);
5042 tv = get_Const_tarval(param);
5043 locality = get_tarval_long(tv);
5045 /* SSE style prefetch */
5048 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5051 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5054 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5057 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5061 assert(ia32_cg_config.use_3dnow_prefetch);
5062 /* 3DNow! style prefetch */
5063 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5066 set_irn_pinned(new_node, get_irn_pinned(node));
5067 set_ia32_op_type(new_node, ia32_AddrModeS);
5068 set_ia32_ls_mode(new_node, mode_Bu);
5069 set_address(new_node, &addr);
5071 SET_IA32_ORIG_NODE(new_node, node);
5073 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5077 * Transform bsf like node
5079 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5081 ir_node *param = get_Builtin_param(node, 0);
5082 dbg_info *dbgi = get_irn_dbg_info(node);
5084 ir_node *block = get_nodes_block(node);
5085 ir_node *new_block = be_transform_node(block);
5087 ia32_address_mode_t am;
5088 ia32_address_t *addr = &am.addr;
5091 match_arguments(&am, block, NULL, param, NULL, match_am);
5093 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5094 set_am_attributes(cnt, &am);
5095 set_ia32_ls_mode(cnt, get_irn_mode(param));
5097 SET_IA32_ORIG_NODE(cnt, node);
5098 return fix_mem_proj(cnt, &am);
5102 * Transform builtin ffs.
5104 static ir_node *gen_ffs(ir_node *node)
5106 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5107 ir_node *real = skip_Proj(bsf);
5108 dbg_info *dbgi = get_irn_dbg_info(real);
5109 ir_node *block = get_nodes_block(real);
5110 ir_node *flag, *set, *conv, *neg, *orn, *add;
5113 if (get_irn_mode(real) != mode_T) {
5114 set_irn_mode(real, mode_T);
5115 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5118 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5121 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5122 SET_IA32_ORIG_NODE(set, node);
5125 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5126 SET_IA32_ORIG_NODE(conv, node);
5129 neg = new_bd_ia32_Neg(dbgi, block, conv);
5132 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5133 set_ia32_commutative(orn);
5136 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5137 add_ia32_am_offs_int(add, 1);
5142 * Transform builtin clz.
5144 static ir_node *gen_clz(ir_node *node)
5146 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5147 ir_node *real = skip_Proj(bsr);
5148 dbg_info *dbgi = get_irn_dbg_info(real);
5149 ir_node *block = get_nodes_block(real);
5150 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5152 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5156 * Transform builtin ctz.
5158 static ir_node *gen_ctz(ir_node *node)
5160 return gen_unop_AM(node, new_bd_ia32_Bsf);
5164 * Transform builtin parity.
5166 static ir_node *gen_parity(ir_node *node)
5168 dbg_info *dbgi = get_irn_dbg_info(node);
5169 ir_node *block = get_nodes_block(node);
5170 ir_node *new_block = be_transform_node(block);
5171 ir_node *param = get_Builtin_param(node, 0);
5172 ir_node *new_param = be_transform_node(param);
5175 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5176 * so we have to do complicated xoring first.
5177 * (we should also better lower this before the backend so we still have a
5178 * chance for CSE, constant folding and other goodies for some of these
5181 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5182 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5183 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5185 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5188 set_irn_mode(xor2, mode_T);
5189 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5192 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5193 SET_IA32_ORIG_NODE(new_node, node);
5196 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5197 nomem, new_node, mode_Bu);
5198 SET_IA32_ORIG_NODE(new_node, node);
5203 * Transform builtin popcount
5205 static ir_node *gen_popcount(ir_node *node)
5207 ir_node *param = get_Builtin_param(node, 0);
5208 dbg_info *dbgi = get_irn_dbg_info(node);
5210 ir_node *block = get_nodes_block(node);
5211 ir_node *new_block = be_transform_node(block);
5214 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5216 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5217 if (ia32_cg_config.use_popcnt) {
5218 ia32_address_mode_t am;
5219 ia32_address_t *addr = &am.addr;
5222 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5224 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5225 set_am_attributes(cnt, &am);
5226 set_ia32_ls_mode(cnt, get_irn_mode(param));
5228 SET_IA32_ORIG_NODE(cnt, node);
5229 return fix_mem_proj(cnt, &am);
5232 new_param = be_transform_node(param);
5234 /* do the standard popcount algo */
5235 /* TODO: This is stupid, we should transform this before the backend,
5236 * to get CSE, localopts, etc. for the operations
5237 * TODO: This is also not the optimal algorithm (it is just the starting
5238 * example in hackers delight, they optimize it more on the following page)
5239 * But I'm too lazy to fix this now, as the code should get lowered before
5240 * the backend anyway.
5243 /* m1 = x & 0x55555555 */
5244 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5245 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5248 simm = ia32_create_Immediate(NULL, 0, 1);
5249 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5251 /* m2 = s1 & 0x55555555 */
5252 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5255 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5257 /* m4 = m3 & 0x33333333 */
5258 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5259 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5262 simm = ia32_create_Immediate(NULL, 0, 2);
5263 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5265 /* m5 = s2 & 0x33333333 */
5266 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5269 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5271 /* m7 = m6 & 0x0F0F0F0F */
5272 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5273 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5276 simm = ia32_create_Immediate(NULL, 0, 4);
5277 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5279 /* m8 = s3 & 0x0F0F0F0F */
5280 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5283 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5285 /* m10 = m9 & 0x00FF00FF */
5286 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5287 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5290 simm = ia32_create_Immediate(NULL, 0, 8);
5291 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5293 /* m11 = s4 & 0x00FF00FF */
5294 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5296 /* m12 = m10 + m11 */
5297 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5299 /* m13 = m12 & 0x0000FFFF */
5300 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5301 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5303 /* s5 = m12 >> 16 */
5304 simm = ia32_create_Immediate(NULL, 0, 16);
5305 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5307 /* res = m13 + s5 */
5308 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5312 * Transform builtin byte swap.
5314 static ir_node *gen_bswap(ir_node *node)
5316 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5317 dbg_info *dbgi = get_irn_dbg_info(node);
5319 ir_node *block = get_nodes_block(node);
5320 ir_node *new_block = be_transform_node(block);
5321 ir_mode *mode = get_irn_mode(param);
5322 unsigned size = get_mode_size_bits(mode);
5323 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5327 if (ia32_cg_config.use_i486) {
5328 /* swap available */
5329 return new_bd_ia32_Bswap(dbgi, new_block, param);
5331 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5332 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5334 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5335 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5337 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5339 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5340 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5342 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5343 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5346 /* swap16 always available */
5347 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5350 panic("Invalid bswap size (%d)", size);
5355 * Transform builtin outport.
5357 static ir_node *gen_outport(ir_node *node)
5359 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5360 ir_node *oldv = get_Builtin_param(node, 1);
5361 ir_mode *mode = get_irn_mode(oldv);
5362 ir_node *value = be_transform_node(oldv);
5363 ir_node *block = be_transform_node(get_nodes_block(node));
5364 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5365 dbg_info *dbgi = get_irn_dbg_info(node);
5367 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5368 set_ia32_ls_mode(res, mode);
5373 * Transform builtin inport.
5375 static ir_node *gen_inport(ir_node *node)
5377 ir_type *tp = get_Builtin_type(node);
5378 ir_type *rstp = get_method_res_type(tp, 0);
5379 ir_mode *mode = get_type_mode(rstp);
5380 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5381 ir_node *block = be_transform_node(get_nodes_block(node));
5382 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5383 dbg_info *dbgi = get_irn_dbg_info(node);
5385 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5386 set_ia32_ls_mode(res, mode);
5388 /* check for missing Result Proj */
5393 * Transform a builtin inner trampoline
5395 static ir_node *gen_inner_trampoline(ir_node *node)
5397 ir_node *ptr = get_Builtin_param(node, 0);
5398 ir_node *callee = get_Builtin_param(node, 1);
5399 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5400 ir_node *mem = get_Builtin_mem(node);
5401 ir_node *block = get_nodes_block(node);
5402 ir_node *new_block = be_transform_node(block);
5406 ir_node *trampoline;
5408 dbg_info *dbgi = get_irn_dbg_info(node);
5409 ia32_address_t addr;
5411 /* construct store address */
5412 memset(&addr, 0, sizeof(addr));
5413 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5415 if (addr.base == NULL) {
5416 addr.base = noreg_GP;
5418 addr.base = be_transform_node(addr.base);
5421 if (addr.index == NULL) {
5422 addr.index = noreg_GP;
5424 addr.index = be_transform_node(addr.index);
5426 addr.mem = be_transform_node(mem);
5428 /* mov ecx, <env> */
5429 val = ia32_create_Immediate(NULL, 0, 0xB9);
5430 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5431 addr.index, addr.mem, val);
5432 set_irn_pinned(store, get_irn_pinned(node));
5433 set_ia32_op_type(store, ia32_AddrModeD);
5434 set_ia32_ls_mode(store, mode_Bu);
5435 set_address(store, &addr);
5439 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5440 addr.index, addr.mem, env);
5441 set_irn_pinned(store, get_irn_pinned(node));
5442 set_ia32_op_type(store, ia32_AddrModeD);
5443 set_ia32_ls_mode(store, mode_Iu);
5444 set_address(store, &addr);
5448 /* jmp rel <callee> */
5449 val = ia32_create_Immediate(NULL, 0, 0xE9);
5450 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5451 addr.index, addr.mem, val);
5452 set_irn_pinned(store, get_irn_pinned(node));
5453 set_ia32_op_type(store, ia32_AddrModeD);
5454 set_ia32_ls_mode(store, mode_Bu);
5455 set_address(store, &addr);
5459 trampoline = be_transform_node(ptr);
5461 /* the callee is typically an immediate */
5462 if (is_SymConst(callee)) {
5463 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5465 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5466 add_ia32_am_offs_int(rel, -10);
5468 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5470 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5471 addr.index, addr.mem, rel);
5472 set_irn_pinned(store, get_irn_pinned(node));
5473 set_ia32_op_type(store, ia32_AddrModeD);
5474 set_ia32_ls_mode(store, mode_Iu);
5475 set_address(store, &addr);
5480 return new_r_Tuple(new_block, 2, in);
5484 * Transform Builtin node.
5486 static ir_node *gen_Builtin(ir_node *node)
5488 ir_builtin_kind kind = get_Builtin_kind(node);
5492 return gen_trap(node);
5493 case ir_bk_debugbreak:
5494 return gen_debugbreak(node);
5495 case ir_bk_return_address:
5496 return gen_return_address(node);
5497 case ir_bk_frame_address:
5498 return gen_frame_address(node);
5499 case ir_bk_prefetch:
5500 return gen_prefetch(node);
5502 return gen_ffs(node);
5504 return gen_clz(node);
5506 return gen_ctz(node);
5508 return gen_parity(node);
5509 case ir_bk_popcount:
5510 return gen_popcount(node);
5512 return gen_bswap(node);
5514 return gen_outport(node);
5516 return gen_inport(node);
5517 case ir_bk_inner_trampoline:
5518 return gen_inner_trampoline(node);
5520 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5524 * Transform Proj(Builtin) node.
5526 static ir_node *gen_Proj_Builtin(ir_node *proj)
5528 ir_node *node = get_Proj_pred(proj);
5529 ir_node *new_node = be_transform_node(node);
5530 ir_builtin_kind kind = get_Builtin_kind(node);
5533 case ir_bk_return_address:
5534 case ir_bk_frame_address:
5539 case ir_bk_popcount:
5541 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5544 case ir_bk_debugbreak:
5545 case ir_bk_prefetch:
5547 assert(get_Proj_proj(proj) == pn_Builtin_M);
5550 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5551 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5553 assert(get_Proj_proj(proj) == pn_Builtin_M);
5554 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5556 case ir_bk_inner_trampoline:
5557 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5558 return get_Tuple_pred(new_node, 1);
5560 assert(get_Proj_proj(proj) == pn_Builtin_M);
5561 return get_Tuple_pred(new_node, 0);
5564 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5567 static ir_node *gen_be_IncSP(ir_node *node)
5569 ir_node *res = be_duplicate_node(node);
5570 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5576 * Transform the Projs from a be_Call.
5578 static ir_node *gen_Proj_be_Call(ir_node *node)
5580 ir_node *call = get_Proj_pred(node);
5581 ir_node *new_call = be_transform_node(call);
5582 dbg_info *dbgi = get_irn_dbg_info(node);
5583 long proj = get_Proj_proj(node);
5584 ir_mode *mode = get_irn_mode(node);
5587 if (proj == pn_be_Call_M) {
5588 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5590 /* transform call modes */
5591 if (mode_is_data(mode)) {
5592 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5596 /* Map from be_Call to ia32_Call proj number */
5597 if (proj == pn_be_Call_sp) {
5598 proj = pn_ia32_Call_stack;
5599 } else if (proj == pn_be_Call_M) {
5600 proj = pn_ia32_Call_M;
5601 } else if (proj == pn_be_Call_X_except) {
5602 proj = pn_ia32_Call_X_except;
5603 } else if (proj == pn_be_Call_X_regular) {
5604 proj = pn_ia32_Call_X_regular;
5606 arch_register_req_t const *const req = arch_get_register_req_out(node);
5607 int const n_outs = arch_irn_get_n_outs(new_call);
5610 assert(proj >= pn_be_Call_first_res);
5611 assert(req->type & arch_register_req_type_limited);
5613 for (i = 0; i < n_outs; ++i) {
5614 arch_register_req_t const *const new_req
5615 = arch_get_out_register_req(new_call, i);
5617 if (!(new_req->type & arch_register_req_type_limited) ||
5618 new_req->cls != req->cls ||
5619 *new_req->limited != *req->limited)
5628 res = new_rd_Proj(dbgi, new_call, mode, proj);
5630 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5632 case pn_ia32_Call_stack:
5633 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5636 case pn_ia32_Call_fpcw:
5637 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5645 * Transform the Projs from a Cmp.
5647 static ir_node *gen_Proj_Cmp(ir_node *node)
5649 /* this probably means not all mode_b nodes were lowered... */
5650 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5654 static ir_node *gen_Proj_ASM(ir_node *node)
5656 ir_mode *mode = get_irn_mode(node);
5657 ir_node *pred = get_Proj_pred(node);
5658 ir_node *new_pred = be_transform_node(pred);
5659 long pos = get_Proj_proj(node);
5661 if (mode == mode_M) {
5662 pos = arch_irn_get_n_outs(new_pred)-1;
5663 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5665 } else if (mode_is_float(mode)) {
5668 panic("unexpected proj mode at ASM");
5671 return new_r_Proj(new_pred, mode, pos);
5675 * Transform and potentially renumber Proj nodes.
5677 static ir_node *gen_Proj(ir_node *node)
5679 ir_node *pred = get_Proj_pred(node);
5682 switch (get_irn_opcode(pred)) {
5684 return gen_Proj_Load(node);
5686 return gen_Proj_Store(node);
5688 return gen_Proj_ASM(node);
5690 return gen_Proj_Builtin(node);
5692 return gen_Proj_Div(node);
5694 return gen_Proj_Mod(node);
5696 return gen_Proj_CopyB(node);
5698 return gen_Proj_be_SubSP(node);
5700 return gen_Proj_be_AddSP(node);
5702 return gen_Proj_be_Call(node);
5704 return gen_Proj_Cmp(node);
5706 proj = get_Proj_proj(node);
5708 case pn_Start_X_initial_exec: {
5709 ir_node *block = get_nodes_block(pred);
5710 ir_node *new_block = be_transform_node(block);
5711 dbg_info *dbgi = get_irn_dbg_info(node);
5712 /* we exchange the ProjX with a jump */
5713 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5721 if (is_ia32_l_FloattoLL(pred)) {
5722 return gen_Proj_l_FloattoLL(node);
5724 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5728 ir_mode *mode = get_irn_mode(node);
5729 if (ia32_mode_needs_gp_reg(mode)) {
5730 ir_node *new_pred = be_transform_node(pred);
5731 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5732 get_Proj_proj(node));
5733 new_proj->node_nr = node->node_nr;
5738 return be_duplicate_node(node);
5742 * Enters all transform functions into the generic pointer
5744 static void register_transformers(void)
5746 /* first clear the generic function pointer for all ops */
5747 be_start_transform_setup();
5749 be_set_transform_function(op_Add, gen_Add);
5750 be_set_transform_function(op_And, gen_And);
5751 be_set_transform_function(op_ASM, ia32_gen_ASM);
5752 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5753 be_set_transform_function(op_be_Call, gen_be_Call);
5754 be_set_transform_function(op_be_Copy, gen_be_Copy);
5755 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5756 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5757 be_set_transform_function(op_be_Return, gen_be_Return);
5758 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5759 be_set_transform_function(op_Builtin, gen_Builtin);
5760 be_set_transform_function(op_Cmp, gen_Cmp);
5761 be_set_transform_function(op_Cond, gen_Cond);
5762 be_set_transform_function(op_Const, gen_Const);
5763 be_set_transform_function(op_Conv, gen_Conv);
5764 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5765 be_set_transform_function(op_Div, gen_Div);
5766 be_set_transform_function(op_Eor, gen_Eor);
5767 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5768 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5769 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5770 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5771 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5772 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5773 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5774 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5775 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5776 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5777 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5778 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5779 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5780 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5781 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5782 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5783 be_set_transform_function(op_IJmp, gen_IJmp);
5784 be_set_transform_function(op_Jmp, gen_Jmp);
5785 be_set_transform_function(op_Load, gen_Load);
5786 be_set_transform_function(op_Minus, gen_Minus);
5787 be_set_transform_function(op_Mod, gen_Mod);
5788 be_set_transform_function(op_Mul, gen_Mul);
5789 be_set_transform_function(op_Mulh, gen_Mulh);
5790 be_set_transform_function(op_Mux, gen_Mux);
5791 be_set_transform_function(op_Not, gen_Not);
5792 be_set_transform_function(op_Or, gen_Or);
5793 be_set_transform_function(op_Phi, gen_Phi);
5794 be_set_transform_function(op_Proj, gen_Proj);
5795 be_set_transform_function(op_Rotl, gen_Rotl);
5796 be_set_transform_function(op_Shl, gen_Shl);
5797 be_set_transform_function(op_Shr, gen_Shr);
5798 be_set_transform_function(op_Shrs, gen_Shrs);
5799 be_set_transform_function(op_Store, gen_Store);
5800 be_set_transform_function(op_Sub, gen_Sub);
5801 be_set_transform_function(op_SymConst, gen_SymConst);
5802 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5806 * Pre-transform all unknown and noreg nodes.
5808 static void ia32_pretransform_node(void)
5810 ir_graph *irg = current_ir_graph;
5811 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5813 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5814 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5815 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5816 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5817 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5819 nomem = get_irg_no_mem(irg);
5820 noreg_GP = ia32_new_NoReg_gp(irg);
5824 * Post-process all calls if we are in SSE mode.
5825 * The ABI requires that the results are in st0, copy them
5826 * to a xmm register.
5828 static void postprocess_fp_call_results(void)
5832 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5833 ir_node *call = call_list[i];
5834 ir_type *mtp = call_types[i];
5837 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5838 ir_type *res_tp = get_method_res_type(mtp, j);
5839 ir_node *res, *new_res;
5840 const ir_edge_t *edge, *next;
5843 if (! is_atomic_type(res_tp)) {
5844 /* no floating point return */
5847 res_mode = get_type_mode(res_tp);
5848 if (! mode_is_float(res_mode)) {
5849 /* no floating point return */
5853 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5856 /* now patch the users */
5857 foreach_out_edge_safe(res, edge, next) {
5858 ir_node *succ = get_edge_src_irn(edge);
5861 if (be_is_Keep(succ))
5864 if (is_ia32_xStore(succ)) {
5865 /* an xStore can be patched into an vfst */
5866 dbg_info *db = get_irn_dbg_info(succ);
5867 ir_node *block = get_nodes_block(succ);
5868 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5869 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5870 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5871 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5872 ir_mode *mode = get_ia32_ls_mode(succ);
5874 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5875 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5876 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5877 if (is_ia32_use_frame(succ))
5878 set_ia32_use_frame(st);
5879 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5880 set_irn_pinned(st, get_irn_pinned(succ));
5881 set_ia32_op_type(st, ia32_AddrModeD);
5883 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5884 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5885 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5888 } else if (new_res == NULL) {
5889 dbg_info *db = get_irn_dbg_info(call);
5890 ir_node *block = get_nodes_block(call);
5891 ir_node *frame = get_irg_frame(current_ir_graph);
5892 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5893 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5894 ir_node *vfst, *xld, *new_mem;
5897 /* store st(0) on stack */
5898 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5900 set_ia32_op_type(vfst, ia32_AddrModeD);
5901 set_ia32_use_frame(vfst);
5903 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5905 /* load into SSE register */
5906 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5908 set_ia32_op_type(xld, ia32_AddrModeS);
5909 set_ia32_use_frame(xld);
5911 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5912 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5914 if (old_mem != NULL) {
5915 edges_reroute(old_mem, new_mem);
5919 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5925 /* do the transformation */
5926 void ia32_transform_graph(ir_graph *irg)
5930 register_transformers();
5931 initial_fpcw = NULL;
5932 ia32_no_pic_adjust = 0;
5934 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5936 be_timer_push(T_HEIGHTS);
5937 ia32_heights = heights_new(irg);
5938 be_timer_pop(T_HEIGHTS);
5939 ia32_calculate_non_address_mode_nodes(irg);
5941 /* the transform phase is not safe for CSE (yet) because several nodes get
5942 * attributes set after their creation */
5943 cse_last = get_opt_cse();
5946 call_list = NEW_ARR_F(ir_node *, 0);
5947 call_types = NEW_ARR_F(ir_type *, 0);
5948 be_transform_graph(irg, ia32_pretransform_node);
5950 if (ia32_cg_config.use_sse2)
5951 postprocess_fp_call_results();
5952 DEL_ARR_F(call_types);
5953 DEL_ARR_F(call_list);
5955 set_opt_cse(cse_last);
5957 ia32_free_non_address_mode_nodes();
5958 heights_free(ia32_heights);
5959 ia32_heights = NULL;
5962 void ia32_init_transform(void)
5964 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");