2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * Transforms a Const.
200 static ir_node *gen_Const(ir_node *node)
202 ir_node *old_block = get_nodes_block(node);
203 ir_node *block = be_transform_node(old_block);
204 dbg_info *dbgi = get_irn_dbg_info(node);
205 ir_mode *mode = get_irn_mode(node);
207 assert(is_Const(node));
209 if (mode_is_float(mode)) {
214 if (ia32_cg_config.use_sse2) {
215 tarval *tv = get_Const_tarval(node);
216 if (tarval_is_null(tv)) {
217 load = new_bd_ia32_xZero(dbgi, block);
218 set_ia32_ls_mode(load, mode);
220 #ifdef CONSTRUCT_SSE_CONST
221 } else if (tarval_is_one(tv)) {
222 int cnst = mode == mode_F ? 26 : 55;
223 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
224 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
225 ir_node *pslld, *psrld;
227 load = new_bd_ia32_xAllOnes(dbgi, block);
228 set_ia32_ls_mode(load, mode);
229 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
230 set_ia32_ls_mode(pslld, mode);
231 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
232 set_ia32_ls_mode(psrld, mode);
234 #endif /* CONSTRUCT_SSE_CONST */
235 } else if (mode == mode_F) {
236 /* we can place any 32bit constant by using a movd gp, sse */
237 unsigned val = get_tarval_sub_bits(tv, 0) |
238 (get_tarval_sub_bits(tv, 1) << 8) |
239 (get_tarval_sub_bits(tv, 2) << 16) |
240 (get_tarval_sub_bits(tv, 3) << 24);
241 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
242 load = new_bd_ia32_xMovd(dbgi, block, cnst);
243 set_ia32_ls_mode(load, mode);
246 #ifdef CONSTRUCT_SSE_CONST
247 if (mode == mode_D) {
248 unsigned val = get_tarval_sub_bits(tv, 0) |
249 (get_tarval_sub_bits(tv, 1) << 8) |
250 (get_tarval_sub_bits(tv, 2) << 16) |
251 (get_tarval_sub_bits(tv, 3) << 24);
253 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
254 ir_node *cnst, *psllq;
256 /* fine, lower 32bit are zero, produce 32bit value */
257 val = get_tarval_sub_bits(tv, 4) |
258 (get_tarval_sub_bits(tv, 5) << 8) |
259 (get_tarval_sub_bits(tv, 6) << 16) |
260 (get_tarval_sub_bits(tv, 7) << 24);
261 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
262 load = new_bd_ia32_xMovd(dbgi, block, cnst);
263 set_ia32_ls_mode(load, mode);
264 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
265 set_ia32_ls_mode(psllq, mode);
270 #endif /* CONSTRUCT_SSE_CONST */
271 floatent = create_float_const_entity(node);
273 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
274 set_ia32_op_type(load, ia32_AddrModeS);
275 set_ia32_am_sc(load, floatent);
276 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
277 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
280 if (is_Const_null(node)) {
281 load = new_bd_ia32_vfldz(dbgi, block);
283 set_ia32_ls_mode(load, mode);
284 } else if (is_Const_one(node)) {
285 load = new_bd_ia32_vfld1(dbgi, block);
287 set_ia32_ls_mode(load, mode);
291 floatent = create_float_const_entity(node);
292 /* create_float_const_ent is smart and sometimes creates
294 ls_mode = get_type_mode(get_entity_type(floatent));
296 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
298 set_ia32_op_type(load, ia32_AddrModeS);
299 set_ia32_am_sc(load, floatent);
300 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
301 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
304 #ifdef CONSTRUCT_SSE_CONST
306 #endif /* CONSTRUCT_SSE_CONST */
307 SET_IA32_ORIG_NODE(load, node);
309 be_dep_on_frame(load);
311 } else { /* non-float mode */
313 tarval *tv = get_Const_tarval(node);
316 tv = tarval_convert_to(tv, mode_Iu);
318 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
320 panic("couldn't convert constant tarval (%+F)", node);
322 val = get_tarval_long(tv);
324 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
325 SET_IA32_ORIG_NODE(cnst, node);
327 be_dep_on_frame(cnst);
333 * Transforms a SymConst.
335 static ir_node *gen_SymConst(ir_node *node)
337 ir_node *old_block = get_nodes_block(node);
338 ir_node *block = be_transform_node(old_block);
339 dbg_info *dbgi = get_irn_dbg_info(node);
340 ir_mode *mode = get_irn_mode(node);
343 if (mode_is_float(mode)) {
344 if (ia32_cg_config.use_sse2)
345 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
347 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 set_ia32_am_sc(cnst, get_SymConst_entity(node));
349 set_ia32_use_frame(cnst);
353 if (get_SymConst_kind(node) != symconst_addr_ent) {
354 panic("backend only support symconst_addr_ent (at %+F)", node);
356 entity = get_SymConst_entity(node);
357 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0);
360 SET_IA32_ORIG_NODE(cnst, node);
362 be_dep_on_frame(cnst);
367 * Create a float type for the given mode and cache it.
369 * @param mode the mode for the float type (might be integer mode for SSE2 types)
370 * @param align alignment
372 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
378 if (mode == mode_Iu) {
379 static ir_type *int_Iu[16] = {NULL, };
381 if (int_Iu[align] == NULL) {
382 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
383 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
384 /* set the specified alignment */
385 set_type_alignment_bytes(tp, align);
387 return int_Iu[align];
388 } else if (mode == mode_Lu) {
389 static ir_type *int_Lu[16] = {NULL, };
391 if (int_Lu[align] == NULL) {
392 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
393 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
394 /* set the specified alignment */
395 set_type_alignment_bytes(tp, align);
397 return int_Lu[align];
398 } else if (mode == mode_F) {
399 static ir_type *float_F[16] = {NULL, };
401 if (float_F[align] == NULL) {
402 snprintf(buf, sizeof(buf), "float_F_%u", align);
403 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
404 /* set the specified alignment */
405 set_type_alignment_bytes(tp, align);
407 return float_F[align];
408 } else if (mode == mode_D) {
409 static ir_type *float_D[16] = {NULL, };
411 if (float_D[align] == NULL) {
412 snprintf(buf, sizeof(buf), "float_D_%u", align);
413 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
414 /* set the specified alignment */
415 set_type_alignment_bytes(tp, align);
417 return float_D[align];
419 static ir_type *float_E[16] = {NULL, };
421 if (float_E[align] == NULL) {
422 snprintf(buf, sizeof(buf), "float_E_%u", align);
423 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
424 /* set the specified alignment */
425 set_type_alignment_bytes(tp, align);
427 return float_E[align];
432 * Create a float[2] array type for the given atomic type.
434 * @param tp the atomic type
436 static ir_type *ia32_create_float_array(ir_type *tp) {
438 ir_mode *mode = get_type_mode(tp);
439 unsigned align = get_type_alignment_bytes(tp);
444 if (mode == mode_F) {
445 static ir_type *float_F[16] = {NULL, };
447 if (float_F[align] != NULL)
448 return float_F[align];
449 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
450 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
451 } else if (mode == mode_D) {
452 static ir_type *float_D[16] = {NULL, };
454 if (float_D[align] != NULL)
455 return float_D[align];
456 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
457 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
459 static ir_type *float_E[16] = {NULL, };
461 if (float_E[align] != NULL)
462 return float_E[align];
463 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
464 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
466 set_type_alignment_bytes(arr, align);
467 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
468 set_type_state(arr, layout_fixed);
472 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
473 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
475 static const struct {
476 const char *ent_name;
477 const char *cnst_str;
480 } names [ia32_known_const_max] = {
481 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
482 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
483 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
484 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
485 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
487 static ir_entity *ent_cache[ia32_known_const_max];
489 const char *ent_name, *cnst_str;
495 ent_name = names[kct].ent_name;
496 if (! ent_cache[kct]) {
497 cnst_str = names[kct].cnst_str;
499 switch (names[kct].mode) {
500 case 0: mode = mode_Iu; break;
501 case 1: mode = mode_Lu; break;
502 default: mode = mode_F; break;
504 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
505 tp = ia32_create_float_type(mode, names[kct].align);
507 if (kct == ia32_ULLBIAS)
508 tp = ia32_create_float_array(tp);
509 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
511 set_entity_ld_ident(ent, get_entity_ident(ent));
512 set_entity_visibility(ent, visibility_local);
513 set_entity_variability(ent, variability_constant);
514 set_entity_allocation(ent, allocation_static);
516 if (kct == ia32_ULLBIAS) {
517 ir_initializer_t *initializer = create_initializer_compound(2);
519 set_initializer_compound_value(initializer, 0,
520 create_initializer_tarval(get_tarval_null(mode)));
521 set_initializer_compound_value(initializer, 1,
522 create_initializer_tarval(tv));
524 set_entity_initializer(ent, initializer);
526 set_entity_initializer(ent, create_initializer_tarval(tv));
529 /* cache the entry */
530 ent_cache[kct] = ent;
533 return ent_cache[kct];
537 * return true if the node is a Proj(Load) and could be used in source address
538 * mode for another node. Will return only true if the @p other node is not
539 * dependent on the memory of the Load (for binary operations use the other
540 * input here, for unary operations use NULL).
542 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
543 ir_node *other, ir_node *other2, match_flags_t flags)
548 /* float constants are always available */
549 if (is_Const(node)) {
550 ir_mode *mode = get_irn_mode(node);
551 if (mode_is_float(mode)) {
552 if (ia32_cg_config.use_sse2) {
553 if (is_simple_sse_Const(node))
556 if (is_simple_x87_Const(node))
559 if (get_irn_n_edges(node) > 1)
567 load = get_Proj_pred(node);
568 pn = get_Proj_proj(node);
569 if (!is_Load(load) || pn != pn_Load_res)
571 if (get_nodes_block(load) != block)
573 /* we only use address mode if we're the only user of the load */
574 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
576 /* in some edge cases with address mode we might reach the load normally
577 * and through some AM sequence, if it is already materialized then we
578 * can't create an AM node from it */
579 if (be_is_transformed(node))
582 /* don't do AM if other node inputs depend on the load (via mem-proj) */
583 if (other != NULL && prevents_AM(block, load, other))
586 if (other2 != NULL && prevents_AM(block, load, other2))
592 typedef struct ia32_address_mode_t ia32_address_mode_t;
593 struct ia32_address_mode_t {
598 ia32_op_type_t op_type;
602 unsigned commutative : 1;
603 unsigned ins_permuted : 1;
606 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
608 /* construct load address */
609 memset(addr, 0, sizeof(addr[0]));
610 ia32_create_address_mode(addr, ptr, 0);
612 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
613 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
614 addr->mem = be_transform_node(mem);
617 static void build_address(ia32_address_mode_t *am, ir_node *node,
618 ia32_create_am_flags_t flags)
620 ia32_address_t *addr = &am->addr;
626 if (is_Const(node)) {
627 ir_entity *entity = create_float_const_entity(node);
628 addr->base = noreg_GP;
629 addr->index = noreg_GP;
631 addr->symconst_ent = entity;
633 am->ls_mode = get_type_mode(get_entity_type(entity));
634 am->pinned = op_pin_state_floats;
638 load = get_Proj_pred(node);
639 ptr = get_Load_ptr(load);
640 mem = get_Load_mem(load);
641 new_mem = be_transform_node(mem);
642 am->pinned = get_irn_pinned(load);
643 am->ls_mode = get_Load_mode(load);
644 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
647 /* construct load address */
648 ia32_create_address_mode(addr, ptr, flags);
650 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
651 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
655 static void set_address(ir_node *node, const ia32_address_t *addr)
657 set_ia32_am_scale(node, addr->scale);
658 set_ia32_am_sc(node, addr->symconst_ent);
659 set_ia32_am_offs_int(node, addr->offset);
660 if (addr->symconst_sign)
661 set_ia32_am_sc_sign(node);
663 set_ia32_use_frame(node);
664 set_ia32_frame_ent(node, addr->frame_entity);
668 * Apply attributes of a given address mode to a node.
670 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
672 set_address(node, &am->addr);
674 set_ia32_op_type(node, am->op_type);
675 set_ia32_ls_mode(node, am->ls_mode);
676 if (am->pinned == op_pin_state_pinned) {
677 /* beware: some nodes are already pinned and did not allow to change the state */
678 if (get_irn_pinned(node) != op_pin_state_pinned)
679 set_irn_pinned(node, op_pin_state_pinned);
682 set_ia32_commutative(node);
686 * Check, if a given node is a Down-Conv, ie. a integer Conv
687 * from a mode with a mode with more bits to a mode with lesser bits.
688 * Moreover, we return only true if the node has not more than 1 user.
690 * @param node the node
691 * @return non-zero if node is a Down-Conv
693 static int is_downconv(const ir_node *node)
701 /* we only want to skip the conv when we're the only user
702 * (not optimal but for now...)
704 if (get_irn_n_edges(node) > 1)
707 src_mode = get_irn_mode(get_Conv_op(node));
708 dest_mode = get_irn_mode(node);
710 ia32_mode_needs_gp_reg(src_mode) &&
711 ia32_mode_needs_gp_reg(dest_mode) &&
712 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
715 /* Skip all Down-Conv's on a given node and return the resulting node. */
716 ir_node *ia32_skip_downconv(ir_node *node)
718 while (is_downconv(node))
719 node = get_Conv_op(node);
724 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
726 ir_mode *mode = get_irn_mode(node);
731 if (mode_is_signed(mode)) {
736 block = get_nodes_block(node);
737 dbgi = get_irn_dbg_info(node);
739 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
743 * matches operands of a node into ia32 addressing/operand modes. This covers
744 * usage of source address mode, immediates, operations with non 32-bit modes,
746 * The resulting data is filled into the @p am struct. block is the block
747 * of the node whose arguments are matched. op1, op2 are the first and second
748 * input that are matched (op1 may be NULL). other_op is another unrelated
749 * input that is not matched! but which is needed sometimes to check if AM
750 * for op1/op2 is legal.
751 * @p flags describes the supported modes of the operation in detail.
753 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
754 ir_node *op1, ir_node *op2, ir_node *other_op,
757 ia32_address_t *addr = &am->addr;
758 ir_mode *mode = get_irn_mode(op2);
759 int mode_bits = get_mode_size_bits(mode);
760 ir_node *new_op1, *new_op2;
762 unsigned commutative;
763 int use_am_and_immediates;
766 memset(am, 0, sizeof(am[0]));
768 commutative = (flags & match_commutative) != 0;
769 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
770 use_am = (flags & match_am) != 0;
771 use_immediate = (flags & match_immediate) != 0;
772 assert(!use_am_and_immediates || use_immediate);
775 assert(!commutative || op1 != NULL);
776 assert(use_am || !(flags & match_8bit_am));
777 assert(use_am || !(flags & match_16bit_am));
779 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
780 (mode_bits == 16 && !(flags & match_16bit_am))) {
784 /* we can simply skip downconvs for mode neutral nodes: the upper bits
785 * can be random for these operations */
786 if (flags & match_mode_neutral) {
787 op2 = ia32_skip_downconv(op2);
789 op1 = ia32_skip_downconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, 0);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(env_cg);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, 0);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(env_cg);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = 1;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
841 new_op2 = be_transform_node(op2);
843 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
845 if (addr->base == NULL)
846 addr->base = noreg_GP;
847 if (addr->index == NULL)
848 addr->index = noreg_GP;
849 if (addr->mem == NULL)
852 am->new_op1 = new_op1;
853 am->new_op2 = new_op2;
854 am->commutative = commutative;
858 * "Fixes" a node that uses address mode by turning it into mode_T
859 * and returning a pn_ia32_res Proj.
861 * @param node the node
862 * @param am its address mode
864 * @return a Proj(pn_ia32_res) if a memory address mode is used,
867 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
872 if (am->mem_proj == NULL)
875 /* we have to create a mode_T so the old MemProj can attach to us */
876 mode = get_irn_mode(node);
877 load = get_Proj_pred(am->mem_proj);
879 be_set_transformed_node(load, node);
881 if (mode != mode_T) {
882 set_irn_mode(node, mode_T);
883 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
890 * Construct a standard binary operation, set AM and immediate if required.
892 * @param node The original node for which the binop is created
893 * @param op1 The first operand
894 * @param op2 The second operand
895 * @param func The node constructor function
896 * @return The constructed ia32 node.
898 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
899 construct_binop_func *func, match_flags_t flags)
902 ir_node *block, *new_block, *new_node;
903 ia32_address_mode_t am;
904 ia32_address_t *addr = &am.addr;
906 block = get_nodes_block(node);
907 match_arguments(&am, block, op1, op2, NULL, flags);
909 dbgi = get_irn_dbg_info(node);
910 new_block = be_transform_node(block);
911 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
912 am.new_op1, am.new_op2);
913 set_am_attributes(new_node, &am);
914 /* we can't use source address mode anymore when using immediates */
915 if (!(flags & match_am_and_immediates) &&
916 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
917 set_ia32_am_support(new_node, ia32_am_none);
918 SET_IA32_ORIG_NODE(new_node, node);
920 new_node = fix_mem_proj(new_node, &am);
926 * Generic names for the inputs of an ia32 binary op.
929 n_ia32_l_binop_left, /**< ia32 left input */
930 n_ia32_l_binop_right, /**< ia32 right input */
931 n_ia32_l_binop_eflags /**< ia32 eflags input */
933 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
934 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
935 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
936 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
937 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
941 * Construct a binary operation which also consumes the eflags.
943 * @param node The node to transform
944 * @param func The node constructor function
945 * @param flags The match flags
946 * @return The constructor ia32 node
948 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
951 ir_node *src_block = get_nodes_block(node);
952 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
953 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
954 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
956 ir_node *block, *new_node, *new_eflags;
957 ia32_address_mode_t am;
958 ia32_address_t *addr = &am.addr;
960 match_arguments(&am, src_block, op1, op2, eflags, flags);
962 dbgi = get_irn_dbg_info(node);
963 block = be_transform_node(src_block);
964 new_eflags = be_transform_node(eflags);
965 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
966 am.new_op1, am.new_op2, new_eflags);
967 set_am_attributes(new_node, &am);
968 /* we can't use source address mode anymore when using immediates */
969 if (!(flags & match_am_and_immediates) &&
970 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
971 set_ia32_am_support(new_node, ia32_am_none);
972 SET_IA32_ORIG_NODE(new_node, node);
974 new_node = fix_mem_proj(new_node, &am);
979 static ir_node *get_fpcw(void)
982 if (initial_fpcw != NULL)
985 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
986 &ia32_fp_cw_regs[REG_FPCW]);
987 initial_fpcw = be_transform_node(fpcw);
993 * Construct a standard binary operation, set AM and immediate if required.
995 * @param op1 The first operand
996 * @param op2 The second operand
997 * @param func The node constructor function
998 * @return The constructed ia32 node.
1000 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1001 construct_binop_float_func *func)
1003 ir_mode *mode = get_irn_mode(node);
1005 ir_node *block, *new_block, *new_node;
1006 ia32_address_mode_t am;
1007 ia32_address_t *addr = &am.addr;
1008 ia32_x87_attr_t *attr;
1009 /* All operations are considered commutative, because there are reverse
1011 match_flags_t flags = match_commutative;
1013 /* cannot use address mode with long double on x87 */
1014 if (get_mode_size_bits(mode) <= 64)
1017 block = get_nodes_block(node);
1018 match_arguments(&am, block, op1, op2, NULL, flags);
1020 dbgi = get_irn_dbg_info(node);
1021 new_block = be_transform_node(block);
1022 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1023 am.new_op1, am.new_op2, get_fpcw());
1024 set_am_attributes(new_node, &am);
1026 attr = get_ia32_x87_attr(new_node);
1027 attr->attr.data.ins_permuted = am.ins_permuted;
1029 SET_IA32_ORIG_NODE(new_node, node);
1031 new_node = fix_mem_proj(new_node, &am);
1037 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1039 * @param op1 The first operand
1040 * @param op2 The second operand
1041 * @param func The node constructor function
1042 * @return The constructed ia32 node.
1044 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1045 construct_shift_func *func,
1046 match_flags_t flags)
1049 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1051 assert(! mode_is_float(get_irn_mode(node)));
1052 assert(flags & match_immediate);
1053 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1055 if (flags & match_mode_neutral) {
1056 op1 = ia32_skip_downconv(op1);
1057 new_op1 = be_transform_node(op1);
1058 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1059 new_op1 = create_upconv(op1, node);
1061 new_op1 = be_transform_node(op1);
1064 /* the shift amount can be any mode that is bigger than 5 bits, since all
1065 * other bits are ignored anyway */
1066 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1067 ir_node *const op = get_Conv_op(op2);
1068 if (mode_is_float(get_irn_mode(op)))
1071 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1073 new_op2 = create_immediate_or_transform(op2, 0);
1075 dbgi = get_irn_dbg_info(node);
1076 block = get_nodes_block(node);
1077 new_block = be_transform_node(block);
1078 new_node = func(dbgi, new_block, new_op1, new_op2);
1079 SET_IA32_ORIG_NODE(new_node, node);
1081 /* lowered shift instruction may have a dependency operand, handle it here */
1082 if (get_irn_arity(node) == 3) {
1083 /* we have a dependency */
1084 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1085 add_irn_dep(new_node, new_dep);
1093 * Construct a standard unary operation, set AM and immediate if required.
1095 * @param op The operand
1096 * @param func The node constructor function
1097 * @return The constructed ia32 node.
1099 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1100 match_flags_t flags)
1103 ir_node *block, *new_block, *new_op, *new_node;
1105 assert(flags == 0 || flags == match_mode_neutral);
1106 if (flags & match_mode_neutral) {
1107 op = ia32_skip_downconv(op);
1110 new_op = be_transform_node(op);
1111 dbgi = get_irn_dbg_info(node);
1112 block = get_nodes_block(node);
1113 new_block = be_transform_node(block);
1114 new_node = func(dbgi, new_block, new_op);
1116 SET_IA32_ORIG_NODE(new_node, node);
1121 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1122 ia32_address_t *addr)
1124 ir_node *base, *index, *res;
1130 base = be_transform_node(base);
1133 index = addr->index;
1134 if (index == NULL) {
1137 index = be_transform_node(index);
1140 res = new_bd_ia32_Lea(dbgi, block, base, index);
1141 set_address(res, addr);
1147 * Returns non-zero if a given address mode has a symbolic or
1148 * numerical offset != 0.
1150 static int am_has_immediates(const ia32_address_t *addr)
1152 return addr->offset != 0 || addr->symconst_ent != NULL
1153 || addr->frame_entity || addr->use_frame;
1157 * Creates an ia32 Add.
1159 * @return the created ia32 Add node
1161 static ir_node *gen_Add(ir_node *node)
1163 ir_mode *mode = get_irn_mode(node);
1164 ir_node *op1 = get_Add_left(node);
1165 ir_node *op2 = get_Add_right(node);
1167 ir_node *block, *new_block, *new_node, *add_immediate_op;
1168 ia32_address_t addr;
1169 ia32_address_mode_t am;
1171 if (mode_is_float(mode)) {
1172 if (ia32_cg_config.use_sse2)
1173 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1174 match_commutative | match_am);
1176 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1179 ia32_mark_non_am(node);
1181 op2 = ia32_skip_downconv(op2);
1182 op1 = ia32_skip_downconv(op1);
1186 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1187 * 1. Add with immediate -> Lea
1188 * 2. Add with possible source address mode -> Add
1189 * 3. Otherwise -> Lea
1191 memset(&addr, 0, sizeof(addr));
1192 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1193 add_immediate_op = NULL;
1195 dbgi = get_irn_dbg_info(node);
1196 block = get_nodes_block(node);
1197 new_block = be_transform_node(block);
1200 if (addr.base == NULL && addr.index == NULL) {
1201 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1202 addr.symconst_sign, addr.offset);
1203 be_dep_on_frame(new_node);
1204 SET_IA32_ORIG_NODE(new_node, node);
1207 /* add with immediate? */
1208 if (addr.index == NULL) {
1209 add_immediate_op = addr.base;
1210 } else if (addr.base == NULL && addr.scale == 0) {
1211 add_immediate_op = addr.index;
1214 if (add_immediate_op != NULL) {
1215 if (!am_has_immediates(&addr)) {
1216 #ifdef DEBUG_libfirm
1217 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1220 return be_transform_node(add_immediate_op);
1223 new_node = create_lea_from_address(dbgi, new_block, &addr);
1224 SET_IA32_ORIG_NODE(new_node, node);
1228 /* test if we can use source address mode */
1229 match_arguments(&am, block, op1, op2, NULL, match_commutative
1230 | match_mode_neutral | match_am | match_immediate | match_try_am);
1232 /* construct an Add with source address mode */
1233 if (am.op_type == ia32_AddrModeS) {
1234 ia32_address_t *am_addr = &am.addr;
1235 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1236 am_addr->index, am_addr->mem, am.new_op1,
1238 set_am_attributes(new_node, &am);
1239 SET_IA32_ORIG_NODE(new_node, node);
1241 new_node = fix_mem_proj(new_node, &am);
1246 /* otherwise construct a lea */
1247 new_node = create_lea_from_address(dbgi, new_block, &addr);
1248 SET_IA32_ORIG_NODE(new_node, node);
1253 * Creates an ia32 Mul.
1255 * @return the created ia32 Mul node
1257 static ir_node *gen_Mul(ir_node *node)
1259 ir_node *op1 = get_Mul_left(node);
1260 ir_node *op2 = get_Mul_right(node);
1261 ir_mode *mode = get_irn_mode(node);
1263 if (mode_is_float(mode)) {
1264 if (ia32_cg_config.use_sse2)
1265 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1266 match_commutative | match_am);
1268 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1270 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1271 match_commutative | match_am | match_mode_neutral |
1272 match_immediate | match_am_and_immediates);
1276 * Creates an ia32 Mulh.
1277 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1278 * this result while Mul returns the lower 32 bit.
1280 * @return the created ia32 Mulh node
1282 static ir_node *gen_Mulh(ir_node *node)
1284 ir_node *block = get_nodes_block(node);
1285 ir_node *new_block = be_transform_node(block);
1286 dbg_info *dbgi = get_irn_dbg_info(node);
1287 ir_node *op1 = get_Mulh_left(node);
1288 ir_node *op2 = get_Mulh_right(node);
1289 ir_mode *mode = get_irn_mode(node);
1291 ir_node *proj_res_high;
1293 if (mode_is_signed(mode)) {
1294 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1295 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1296 mode_Iu, pn_ia32_IMul1OP_res_high);
1298 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1299 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1300 mode_Iu, pn_ia32_Mul_res_high);
1302 return proj_res_high;
1306 * Creates an ia32 And.
1308 * @return The created ia32 And node
1310 static ir_node *gen_And(ir_node *node)
1312 ir_node *op1 = get_And_left(node);
1313 ir_node *op2 = get_And_right(node);
1314 assert(! mode_is_float(get_irn_mode(node)));
1316 /* is it a zero extension? */
1317 if (is_Const(op2)) {
1318 tarval *tv = get_Const_tarval(op2);
1319 long v = get_tarval_long(tv);
1321 if (v == 0xFF || v == 0xFFFF) {
1322 dbg_info *dbgi = get_irn_dbg_info(node);
1323 ir_node *block = get_nodes_block(node);
1330 assert(v == 0xFFFF);
1333 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1338 return gen_binop(node, op1, op2, new_bd_ia32_And,
1339 match_commutative | match_mode_neutral | match_am | match_immediate);
1345 * Creates an ia32 Or.
1347 * @return The created ia32 Or node
1349 static ir_node *gen_Or(ir_node *node)
1351 ir_node *op1 = get_Or_left(node);
1352 ir_node *op2 = get_Or_right(node);
1354 assert (! mode_is_float(get_irn_mode(node)));
1355 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1356 | match_mode_neutral | match_am | match_immediate);
1362 * Creates an ia32 Eor.
1364 * @return The created ia32 Eor node
1366 static ir_node *gen_Eor(ir_node *node)
1368 ir_node *op1 = get_Eor_left(node);
1369 ir_node *op2 = get_Eor_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1372 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1373 | match_mode_neutral | match_am | match_immediate);
1378 * Creates an ia32 Sub.
1380 * @return The created ia32 Sub node
1382 static ir_node *gen_Sub(ir_node *node)
1384 ir_node *op1 = get_Sub_left(node);
1385 ir_node *op2 = get_Sub_right(node);
1386 ir_mode *mode = get_irn_mode(node);
1388 if (mode_is_float(mode)) {
1389 if (ia32_cg_config.use_sse2)
1390 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1392 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1395 if (is_Const(op2)) {
1396 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1400 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1401 | match_am | match_immediate);
1404 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1405 ir_node *const src_val,
1406 ir_node *const src_mem,
1407 ir_node *const am_mem)
1409 if (is_NoMem(am_mem)) {
1410 return be_transform_node(src_mem);
1411 } else if (is_Proj(src_val) &&
1413 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1414 /* avoid memory loop */
1416 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1417 ir_node *const ptr_pred = get_Proj_pred(src_val);
1418 int const arity = get_Sync_n_preds(src_mem);
1423 NEW_ARR_A(ir_node*, ins, arity + 1);
1425 /* NOTE: This sometimes produces dead-code because the old sync in
1426 * src_mem might not be used anymore, we should detect this case
1427 * and kill the sync... */
1428 for (i = arity - 1; i >= 0; --i) {
1429 ir_node *const pred = get_Sync_pred(src_mem, i);
1431 /* avoid memory loop */
1432 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1435 ins[n++] = be_transform_node(pred);
1440 return new_r_Sync(irg, block, n, ins);
1444 ins[0] = be_transform_node(src_mem);
1446 return new_r_Sync(irg, block, 2, ins);
1451 * Create a 32bit to 64bit signed extension.
1453 * @param dbgi debug info
1454 * @param block the block where node nodes should be placed
1455 * @param val the value to extend
1456 * @param orig the original node
1458 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1459 ir_node *val, const ir_node *orig)
1464 if (ia32_cg_config.use_short_sex_eax) {
1465 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1466 be_dep_on_frame(pval);
1467 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1469 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1470 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1472 SET_IA32_ORIG_NODE(res, orig);
1477 * Generates an ia32 DivMod with additional infrastructure for the
1478 * register allocator if needed.
1480 static ir_node *create_Div(ir_node *node)
1482 dbg_info *dbgi = get_irn_dbg_info(node);
1483 ir_node *block = get_nodes_block(node);
1484 ir_node *new_block = be_transform_node(block);
1491 ir_node *sign_extension;
1492 ia32_address_mode_t am;
1493 ia32_address_t *addr = &am.addr;
1495 /* the upper bits have random contents for smaller modes */
1496 switch (get_irn_opcode(node)) {
1498 op1 = get_Div_left(node);
1499 op2 = get_Div_right(node);
1500 mem = get_Div_mem(node);
1501 mode = get_Div_resmode(node);
1504 op1 = get_Mod_left(node);
1505 op2 = get_Mod_right(node);
1506 mem = get_Mod_mem(node);
1507 mode = get_Mod_resmode(node);
1510 op1 = get_DivMod_left(node);
1511 op2 = get_DivMod_right(node);
1512 mem = get_DivMod_mem(node);
1513 mode = get_DivMod_resmode(node);
1516 panic("invalid divmod node %+F", node);
1519 match_arguments(&am, block, op1, op2, NULL, match_am);
1521 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1522 is the memory of the consumed address. We can have only the second op as address
1523 in Div nodes, so check only op2. */
1524 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1526 if (mode_is_signed(mode)) {
1527 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1528 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1529 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1531 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0);
1532 be_dep_on_frame(sign_extension);
1534 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1535 addr->index, new_mem, am.new_op2,
1536 am.new_op1, sign_extension);
1539 set_irn_pinned(new_node, get_irn_pinned(node));
1541 set_am_attributes(new_node, &am);
1542 SET_IA32_ORIG_NODE(new_node, node);
1544 new_node = fix_mem_proj(new_node, &am);
1550 * Generates an ia32 Mod.
1552 static ir_node *gen_Mod(ir_node *node)
1554 return create_Div(node);
1558 * Generates an ia32 Div.
1560 static ir_node *gen_Div(ir_node *node)
1562 return create_Div(node);
1566 * Generates an ia32 DivMod.
1568 static ir_node *gen_DivMod(ir_node *node)
1570 return create_Div(node);
1576 * Creates an ia32 floating Div.
1578 * @return The created ia32 xDiv node
1580 static ir_node *gen_Quot(ir_node *node)
1582 ir_node *op1 = get_Quot_left(node);
1583 ir_node *op2 = get_Quot_right(node);
1585 if (ia32_cg_config.use_sse2) {
1586 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1588 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1594 * Creates an ia32 Shl.
1596 * @return The created ia32 Shl node
1598 static ir_node *gen_Shl(ir_node *node)
1600 ir_node *left = get_Shl_left(node);
1601 ir_node *right = get_Shl_right(node);
1603 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1604 match_mode_neutral | match_immediate);
1608 * Creates an ia32 Shr.
1610 * @return The created ia32 Shr node
1612 static ir_node *gen_Shr(ir_node *node)
1614 ir_node *left = get_Shr_left(node);
1615 ir_node *right = get_Shr_right(node);
1617 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1623 * Creates an ia32 Sar.
1625 * @return The created ia32 Shrs node
1627 static ir_node *gen_Shrs(ir_node *node)
1629 ir_node *left = get_Shrs_left(node);
1630 ir_node *right = get_Shrs_right(node);
1632 if (is_Const(right)) {
1633 tarval *tv = get_Const_tarval(right);
1634 long val = get_tarval_long(tv);
1636 /* this is a sign extension */
1637 dbg_info *dbgi = get_irn_dbg_info(node);
1638 ir_node *block = be_transform_node(get_nodes_block(node));
1639 ir_node *new_op = be_transform_node(left);
1641 return create_sex_32_64(dbgi, block, new_op, node);
1645 /* 8 or 16 bit sign extension? */
1646 if (is_Const(right) && is_Shl(left)) {
1647 ir_node *shl_left = get_Shl_left(left);
1648 ir_node *shl_right = get_Shl_right(left);
1649 if (is_Const(shl_right)) {
1650 tarval *tv1 = get_Const_tarval(right);
1651 tarval *tv2 = get_Const_tarval(shl_right);
1652 if (tv1 == tv2 && tarval_is_long(tv1)) {
1653 long val = get_tarval_long(tv1);
1654 if (val == 16 || val == 24) {
1655 dbg_info *dbgi = get_irn_dbg_info(node);
1656 ir_node *block = get_nodes_block(node);
1666 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1675 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1681 * Creates an ia32 Rol.
1683 * @param op1 The first operator
1684 * @param op2 The second operator
1685 * @return The created ia32 RotL node
1687 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1689 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1695 * Creates an ia32 Ror.
1696 * NOTE: There is no RotR with immediate because this would always be a RotL
1697 * "imm-mode_size_bits" which can be pre-calculated.
1699 * @param op1 The first operator
1700 * @param op2 The second operator
1701 * @return The created ia32 RotR node
1703 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1705 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1711 * Creates an ia32 RotR or RotL (depending on the found pattern).
1713 * @return The created ia32 RotL or RotR node
1715 static ir_node *gen_Rotl(ir_node *node)
1717 ir_node *rotate = NULL;
1718 ir_node *op1 = get_Rotl_left(node);
1719 ir_node *op2 = get_Rotl_right(node);
1721 /* Firm has only RotL, so we are looking for a right (op2)
1722 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1723 that means we can create a RotR instead of an Add and a RotL */
1727 ir_node *left = get_Add_left(add);
1728 ir_node *right = get_Add_right(add);
1729 if (is_Const(right)) {
1730 tarval *tv = get_Const_tarval(right);
1731 ir_mode *mode = get_irn_mode(node);
1732 long bits = get_mode_size_bits(mode);
1734 if (is_Minus(left) &&
1735 tarval_is_long(tv) &&
1736 get_tarval_long(tv) == bits &&
1739 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1740 rotate = gen_Ror(node, op1, get_Minus_op(left));
1745 if (rotate == NULL) {
1746 rotate = gen_Rol(node, op1, op2);
1755 * Transforms a Minus node.
1757 * @return The created ia32 Minus node
1759 static ir_node *gen_Minus(ir_node *node)
1761 ir_node *op = get_Minus_op(node);
1762 ir_node *block = be_transform_node(get_nodes_block(node));
1763 dbg_info *dbgi = get_irn_dbg_info(node);
1764 ir_mode *mode = get_irn_mode(node);
1769 if (mode_is_float(mode)) {
1770 ir_node *new_op = be_transform_node(op);
1771 if (ia32_cg_config.use_sse2) {
1772 /* TODO: non-optimal... if we have many xXors, then we should
1773 * rather create a load for the const and use that instead of
1774 * several AM nodes... */
1775 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1777 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1778 nomem, new_op, noreg_xmm);
1780 size = get_mode_size_bits(mode);
1781 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1783 set_ia32_am_sc(new_node, ent);
1784 set_ia32_op_type(new_node, ia32_AddrModeS);
1785 set_ia32_ls_mode(new_node, mode);
1787 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1790 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1793 SET_IA32_ORIG_NODE(new_node, node);
1799 * Transforms a Not node.
1801 * @return The created ia32 Not node
1803 static ir_node *gen_Not(ir_node *node)
1805 ir_node *op = get_Not_op(node);
1807 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1808 assert (! mode_is_float(get_irn_mode(node)));
1810 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1816 * Transforms an Abs node.
1818 * @return The created ia32 Abs node
1820 static ir_node *gen_Abs(ir_node *node)
1822 ir_node *block = get_nodes_block(node);
1823 ir_node *new_block = be_transform_node(block);
1824 ir_node *op = get_Abs_op(node);
1825 dbg_info *dbgi = get_irn_dbg_info(node);
1826 ir_mode *mode = get_irn_mode(node);
1832 if (mode_is_float(mode)) {
1833 new_op = be_transform_node(op);
1835 if (ia32_cg_config.use_sse2) {
1836 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1837 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1838 nomem, new_op, noreg_fp);
1840 size = get_mode_size_bits(mode);
1841 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1843 set_ia32_am_sc(new_node, ent);
1845 SET_IA32_ORIG_NODE(new_node, node);
1847 set_ia32_op_type(new_node, ia32_AddrModeS);
1848 set_ia32_ls_mode(new_node, mode);
1850 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1851 SET_IA32_ORIG_NODE(new_node, node);
1854 ir_node *xor, *sign_extension;
1856 if (get_mode_size_bits(mode) == 32) {
1857 new_op = be_transform_node(op);
1859 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1862 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1864 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1865 nomem, new_op, sign_extension);
1866 SET_IA32_ORIG_NODE(xor, node);
1868 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1869 nomem, xor, sign_extension);
1870 SET_IA32_ORIG_NODE(new_node, node);
1877 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1879 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1881 dbg_info *dbgi = get_irn_dbg_info(cmp);
1882 ir_node *block = get_nodes_block(cmp);
1883 ir_node *new_block = be_transform_node(block);
1884 ir_node *op1 = be_transform_node(x);
1885 ir_node *op2 = be_transform_node(n);
1887 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1891 * Transform a node returning a "flag" result.
1893 * @param node the node to transform
1894 * @param pnc_out the compare mode to use
1896 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1903 /* we have a Cmp as input */
1904 if (is_Proj(node)) {
1905 ir_node *pred = get_Proj_pred(node);
1907 pn_Cmp pnc = get_Proj_proj(node);
1908 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1909 ir_node *l = get_Cmp_left(pred);
1910 ir_node *r = get_Cmp_right(pred);
1912 ir_node *la = get_And_left(l);
1913 ir_node *ra = get_And_right(l);
1915 ir_node *c = get_Shl_left(la);
1916 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1917 /* (1 << n) & ra) */
1918 ir_node *n = get_Shl_right(la);
1919 flags = gen_bt(pred, ra, n);
1920 /* we must generate a Jc/Jnc jump */
1921 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1924 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1929 ir_node *c = get_Shl_left(ra);
1930 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1931 /* la & (1 << n)) */
1932 ir_node *n = get_Shl_right(ra);
1933 flags = gen_bt(pred, la, n);
1934 /* we must generate a Jc/Jnc jump */
1935 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1938 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1944 flags = be_transform_node(pred);
1950 /* a mode_b value, we have to compare it against 0 */
1951 dbgi = get_irn_dbg_info(node);
1952 new_block = be_transform_node(get_nodes_block(node));
1953 new_op = be_transform_node(node);
1954 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1955 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1956 *pnc_out = pn_Cmp_Lg;
1961 * Transforms a Load.
1963 * @return the created ia32 Load node
1965 static ir_node *gen_Load(ir_node *node)
1967 ir_node *old_block = get_nodes_block(node);
1968 ir_node *block = be_transform_node(old_block);
1969 ir_node *ptr = get_Load_ptr(node);
1970 ir_node *mem = get_Load_mem(node);
1971 ir_node *new_mem = be_transform_node(mem);
1974 dbg_info *dbgi = get_irn_dbg_info(node);
1975 ir_mode *mode = get_Load_mode(node);
1978 ia32_address_t addr;
1980 /* construct load address */
1981 memset(&addr, 0, sizeof(addr));
1982 ia32_create_address_mode(&addr, ptr, 0);
1989 base = be_transform_node(base);
1992 if (index == NULL) {
1995 index = be_transform_node(index);
1998 if (mode_is_float(mode)) {
1999 if (ia32_cg_config.use_sse2) {
2000 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2002 res_mode = mode_xmm;
2004 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2006 res_mode = mode_vfp;
2009 assert(mode != mode_b);
2011 /* create a conv node with address mode for smaller modes */
2012 if (get_mode_size_bits(mode) < 32) {
2013 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2014 new_mem, noreg_GP, mode);
2016 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2021 set_irn_pinned(new_node, get_irn_pinned(node));
2022 set_ia32_op_type(new_node, ia32_AddrModeS);
2023 set_ia32_ls_mode(new_node, mode);
2024 set_address(new_node, &addr);
2026 if (get_irn_pinned(node) == op_pin_state_floats) {
2027 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2028 && pn_ia32_vfld_res == pn_ia32_Load_res
2029 && pn_ia32_Load_res == pn_ia32_res);
2030 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2033 SET_IA32_ORIG_NODE(new_node, node);
2035 be_dep_on_frame(new_node);
2039 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2040 ir_node *ptr, ir_node *other)
2047 /* we only use address mode if we're the only user of the load */
2048 if (get_irn_n_edges(node) > 1)
2051 load = get_Proj_pred(node);
2054 if (get_nodes_block(load) != block)
2057 /* store should have the same pointer as the load */
2058 if (get_Load_ptr(load) != ptr)
2061 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2062 if (other != NULL &&
2063 get_nodes_block(other) == block &&
2064 heights_reachable_in_block(heights, other, load)) {
2068 if (prevents_AM(block, load, mem))
2070 /* Store should be attached to the load via mem */
2071 assert(heights_reachable_in_block(heights, mem, load));
2076 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2077 ir_node *mem, ir_node *ptr, ir_mode *mode,
2078 construct_binop_dest_func *func,
2079 construct_binop_dest_func *func8bit,
2080 match_flags_t flags)
2082 ir_node *src_block = get_nodes_block(node);
2090 ia32_address_mode_t am;
2091 ia32_address_t *addr = &am.addr;
2092 memset(&am, 0, sizeof(am));
2094 assert(flags & match_immediate); /* there is no destam node without... */
2095 commutative = (flags & match_commutative) != 0;
2097 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2098 build_address(&am, op1, ia32_create_am_double_use);
2099 new_op = create_immediate_or_transform(op2, 0);
2100 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2101 build_address(&am, op2, ia32_create_am_double_use);
2102 new_op = create_immediate_or_transform(op1, 0);
2107 if (addr->base == NULL)
2108 addr->base = noreg_GP;
2109 if (addr->index == NULL)
2110 addr->index = noreg_GP;
2111 if (addr->mem == NULL)
2114 dbgi = get_irn_dbg_info(node);
2115 block = be_transform_node(src_block);
2116 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2118 if (get_mode_size_bits(mode) == 8) {
2119 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2121 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2123 set_address(new_node, addr);
2124 set_ia32_op_type(new_node, ia32_AddrModeD);
2125 set_ia32_ls_mode(new_node, mode);
2126 SET_IA32_ORIG_NODE(new_node, node);
2128 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2129 mem_proj = be_transform_node(am.mem_proj);
2130 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2135 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2136 ir_node *ptr, ir_mode *mode,
2137 construct_unop_dest_func *func)
2139 ir_node *src_block = get_nodes_block(node);
2145 ia32_address_mode_t am;
2146 ia32_address_t *addr = &am.addr;
2148 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2151 memset(&am, 0, sizeof(am));
2152 build_address(&am, op, ia32_create_am_double_use);
2154 dbgi = get_irn_dbg_info(node);
2155 block = be_transform_node(src_block);
2156 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2157 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2158 set_address(new_node, addr);
2159 set_ia32_op_type(new_node, ia32_AddrModeD);
2160 set_ia32_ls_mode(new_node, mode);
2161 SET_IA32_ORIG_NODE(new_node, node);
2163 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2164 mem_proj = be_transform_node(am.mem_proj);
2165 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2170 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2172 ir_mode *mode = get_irn_mode(node);
2173 ir_node *mux_true = get_Mux_true(node);
2174 ir_node *mux_false = get_Mux_false(node);
2184 ia32_address_t addr;
2186 if (get_mode_size_bits(mode) != 8)
2189 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2191 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2197 build_address_ptr(&addr, ptr, mem);
2199 dbgi = get_irn_dbg_info(node);
2200 block = get_nodes_block(node);
2201 new_block = be_transform_node(block);
2202 cond = get_Mux_sel(node);
2203 flags = get_flags_node(cond, &pnc);
2204 new_mem = be_transform_node(mem);
2205 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2206 addr.index, addr.mem, flags, pnc, negated);
2207 set_address(new_node, &addr);
2208 set_ia32_op_type(new_node, ia32_AddrModeD);
2209 set_ia32_ls_mode(new_node, mode);
2210 SET_IA32_ORIG_NODE(new_node, node);
2215 static ir_node *try_create_dest_am(ir_node *node)
2217 ir_node *val = get_Store_value(node);
2218 ir_node *mem = get_Store_mem(node);
2219 ir_node *ptr = get_Store_ptr(node);
2220 ir_mode *mode = get_irn_mode(val);
2221 unsigned bits = get_mode_size_bits(mode);
2226 /* handle only GP modes for now... */
2227 if (!ia32_mode_needs_gp_reg(mode))
2231 /* store must be the only user of the val node */
2232 if (get_irn_n_edges(val) > 1)
2234 /* skip pointless convs */
2236 ir_node *conv_op = get_Conv_op(val);
2237 ir_mode *pred_mode = get_irn_mode(conv_op);
2238 if (!ia32_mode_needs_gp_reg(pred_mode))
2240 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2248 /* value must be in the same block */
2249 if (get_nodes_block(node) != get_nodes_block(val))
2252 switch (get_irn_opcode(val)) {
2254 op1 = get_Add_left(val);
2255 op2 = get_Add_right(val);
2256 if (ia32_cg_config.use_incdec) {
2257 if (is_Const_1(op2)) {
2258 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2260 } else if (is_Const_Minus_1(op2)) {
2261 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2265 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2266 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2267 match_commutative | match_immediate);
2270 op1 = get_Sub_left(val);
2271 op2 = get_Sub_right(val);
2272 if (is_Const(op2)) {
2273 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2275 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2276 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2280 op1 = get_And_left(val);
2281 op2 = get_And_right(val);
2282 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2283 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2284 match_commutative | match_immediate);
2287 op1 = get_Or_left(val);
2288 op2 = get_Or_right(val);
2289 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2290 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2291 match_commutative | match_immediate);
2294 op1 = get_Eor_left(val);
2295 op2 = get_Eor_right(val);
2296 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2297 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2298 match_commutative | match_immediate);
2301 op1 = get_Shl_left(val);
2302 op2 = get_Shl_right(val);
2303 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2304 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2308 op1 = get_Shr_left(val);
2309 op2 = get_Shr_right(val);
2310 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2311 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2315 op1 = get_Shrs_left(val);
2316 op2 = get_Shrs_right(val);
2317 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2318 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2322 op1 = get_Rotl_left(val);
2323 op2 = get_Rotl_right(val);
2324 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2325 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2328 /* TODO: match ROR patterns... */
2330 new_node = try_create_SetMem(val, ptr, mem);
2333 op1 = get_Minus_op(val);
2334 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2337 /* should be lowered already */
2338 assert(mode != mode_b);
2339 op1 = get_Not_op(val);
2340 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2346 if (new_node != NULL) {
2347 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2348 get_irn_pinned(node) == op_pin_state_pinned) {
2349 set_irn_pinned(new_node, op_pin_state_pinned);
2356 static bool possible_int_mode_for_fp(ir_mode *mode)
2360 if (!mode_is_signed(mode))
2362 size = get_mode_size_bits(mode);
2363 if (size != 16 && size != 32)
2368 static int is_float_to_int_conv(const ir_node *node)
2370 ir_mode *mode = get_irn_mode(node);
2374 if (!possible_int_mode_for_fp(mode))
2379 conv_op = get_Conv_op(node);
2380 conv_mode = get_irn_mode(conv_op);
2382 if (!mode_is_float(conv_mode))
2389 * Transform a Store(floatConst) into a sequence of
2392 * @return the created ia32 Store node
2394 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2396 ir_mode *mode = get_irn_mode(cns);
2397 unsigned size = get_mode_size_bytes(mode);
2398 tarval *tv = get_Const_tarval(cns);
2399 ir_node *block = get_nodes_block(node);
2400 ir_node *new_block = be_transform_node(block);
2401 ir_node *ptr = get_Store_ptr(node);
2402 ir_node *mem = get_Store_mem(node);
2403 dbg_info *dbgi = get_irn_dbg_info(node);
2407 ia32_address_t addr;
2409 assert(size % 4 == 0);
2412 build_address_ptr(&addr, ptr, mem);
2416 get_tarval_sub_bits(tv, ofs) |
2417 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2418 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2419 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2420 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2422 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2423 addr.index, addr.mem, imm);
2425 set_irn_pinned(new_node, get_irn_pinned(node));
2426 set_ia32_op_type(new_node, ia32_AddrModeD);
2427 set_ia32_ls_mode(new_node, mode_Iu);
2428 set_address(new_node, &addr);
2429 SET_IA32_ORIG_NODE(new_node, node);
2432 ins[i++] = new_node;
2437 } while (size != 0);
2440 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2447 * Generate a vfist or vfisttp instruction.
2449 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2450 ir_node *mem, ir_node *val, ir_node **fist)
2454 if (ia32_cg_config.use_fisttp) {
2455 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2456 if other users exists */
2457 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2458 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2459 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2460 be_new_Keep(reg_class, irg, block, 1, &value);
2462 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2465 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2468 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2474 * Transforms a general (no special case) Store.
2476 * @return the created ia32 Store node
2478 static ir_node *gen_general_Store(ir_node *node)
2480 ir_node *val = get_Store_value(node);
2481 ir_mode *mode = get_irn_mode(val);
2482 ir_node *block = get_nodes_block(node);
2483 ir_node *new_block = be_transform_node(block);
2484 ir_node *ptr = get_Store_ptr(node);
2485 ir_node *mem = get_Store_mem(node);
2486 dbg_info *dbgi = get_irn_dbg_info(node);
2487 ir_node *new_val, *new_node, *store;
2488 ia32_address_t addr;
2490 /* check for destination address mode */
2491 new_node = try_create_dest_am(node);
2492 if (new_node != NULL)
2495 /* construct store address */
2496 memset(&addr, 0, sizeof(addr));
2497 ia32_create_address_mode(&addr, ptr, 0);
2499 if (addr.base == NULL) {
2500 addr.base = noreg_GP;
2502 addr.base = be_transform_node(addr.base);
2505 if (addr.index == NULL) {
2506 addr.index = noreg_GP;
2508 addr.index = be_transform_node(addr.index);
2510 addr.mem = be_transform_node(mem);
2512 if (mode_is_float(mode)) {
2513 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2515 while (is_Conv(val) && mode == get_irn_mode(val)) {
2516 ir_node *op = get_Conv_op(val);
2517 if (!mode_is_float(get_irn_mode(op)))
2521 new_val = be_transform_node(val);
2522 if (ia32_cg_config.use_sse2) {
2523 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2524 addr.index, addr.mem, new_val);
2526 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2527 addr.index, addr.mem, new_val, mode);
2530 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2531 val = get_Conv_op(val);
2533 /* TODO: is this optimisation still necessary at all (middleend)? */
2534 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2535 while (is_Conv(val)) {
2536 ir_node *op = get_Conv_op(val);
2537 if (!mode_is_float(get_irn_mode(op)))
2539 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2543 new_val = be_transform_node(val);
2544 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2546 new_val = create_immediate_or_transform(val, 0);
2547 assert(mode != mode_b);
2549 if (get_mode_size_bits(mode) == 8) {
2550 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2551 addr.index, addr.mem, new_val);
2553 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2554 addr.index, addr.mem, new_val);
2559 set_irn_pinned(store, get_irn_pinned(node));
2560 set_ia32_op_type(store, ia32_AddrModeD);
2561 set_ia32_ls_mode(store, mode);
2563 set_address(store, &addr);
2564 SET_IA32_ORIG_NODE(store, node);
2570 * Transforms a Store.
2572 * @return the created ia32 Store node
2574 static ir_node *gen_Store(ir_node *node)
2576 ir_node *val = get_Store_value(node);
2577 ir_mode *mode = get_irn_mode(val);
2579 if (mode_is_float(mode) && is_Const(val)) {
2580 /* We can transform every floating const store
2581 into a sequence of integer stores.
2582 If the constant is already in a register,
2583 it would be better to use it, but we don't
2584 have this information here. */
2585 return gen_float_const_Store(node, val);
2587 return gen_general_Store(node);
2591 * Transforms a Switch.
2593 * @return the created ia32 SwitchJmp node
2595 static ir_node *create_Switch(ir_node *node)
2597 dbg_info *dbgi = get_irn_dbg_info(node);
2598 ir_node *block = be_transform_node(get_nodes_block(node));
2599 ir_node *sel = get_Cond_selector(node);
2600 ir_node *new_sel = be_transform_node(sel);
2601 long switch_min = LONG_MAX;
2602 long switch_max = LONG_MIN;
2603 long default_pn = get_Cond_defaultProj(node);
2605 const ir_edge_t *edge;
2607 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2609 /* determine the smallest switch case value */
2610 foreach_out_edge(node, edge) {
2611 ir_node *proj = get_edge_src_irn(edge);
2612 long pn = get_Proj_proj(proj);
2613 if (pn == default_pn)
2616 if (pn < switch_min)
2618 if (pn > switch_max)
2622 if ((unsigned long) (switch_max - switch_min) > 256000) {
2623 panic("Size of switch %+F bigger than 256000", node);
2626 if (switch_min != 0) {
2627 /* if smallest switch case is not 0 we need an additional sub */
2628 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2629 add_ia32_am_offs_int(new_sel, -switch_min);
2630 set_ia32_op_type(new_sel, ia32_AddrModeS);
2632 SET_IA32_ORIG_NODE(new_sel, node);
2635 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2636 SET_IA32_ORIG_NODE(new_node, node);
2642 * Transform a Cond node.
2644 static ir_node *gen_Cond(ir_node *node)
2646 ir_node *block = get_nodes_block(node);
2647 ir_node *new_block = be_transform_node(block);
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2649 ir_node *sel = get_Cond_selector(node);
2650 ir_mode *sel_mode = get_irn_mode(sel);
2651 ir_node *flags = NULL;
2655 if (sel_mode != mode_b) {
2656 return create_Switch(node);
2659 /* we get flags from a Cmp */
2660 flags = get_flags_node(sel, &pnc);
2662 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2663 SET_IA32_ORIG_NODE(new_node, node);
2669 * Transform a be_Copy.
2671 static ir_node *gen_be_Copy(ir_node *node)
2673 ir_node *new_node = be_duplicate_node(node);
2674 ir_mode *mode = get_irn_mode(new_node);
2676 if (ia32_mode_needs_gp_reg(mode)) {
2677 set_irn_mode(new_node, mode_Iu);
2683 static ir_node *create_Fucom(ir_node *node)
2685 dbg_info *dbgi = get_irn_dbg_info(node);
2686 ir_node *block = get_nodes_block(node);
2687 ir_node *new_block = be_transform_node(block);
2688 ir_node *left = get_Cmp_left(node);
2689 ir_node *new_left = be_transform_node(left);
2690 ir_node *right = get_Cmp_right(node);
2694 if (ia32_cg_config.use_fucomi) {
2695 new_right = be_transform_node(right);
2696 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2698 set_ia32_commutative(new_node);
2699 SET_IA32_ORIG_NODE(new_node, node);
2701 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2702 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2704 new_right = be_transform_node(right);
2705 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2708 set_ia32_commutative(new_node);
2710 SET_IA32_ORIG_NODE(new_node, node);
2712 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2713 SET_IA32_ORIG_NODE(new_node, node);
2719 static ir_node *create_Ucomi(ir_node *node)
2721 dbg_info *dbgi = get_irn_dbg_info(node);
2722 ir_node *src_block = get_nodes_block(node);
2723 ir_node *new_block = be_transform_node(src_block);
2724 ir_node *left = get_Cmp_left(node);
2725 ir_node *right = get_Cmp_right(node);
2727 ia32_address_mode_t am;
2728 ia32_address_t *addr = &am.addr;
2730 match_arguments(&am, src_block, left, right, NULL,
2731 match_commutative | match_am);
2733 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2734 addr->mem, am.new_op1, am.new_op2,
2736 set_am_attributes(new_node, &am);
2738 SET_IA32_ORIG_NODE(new_node, node);
2740 new_node = fix_mem_proj(new_node, &am);
2746 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2747 * to fold an and into a test node
2749 static bool can_fold_test_and(ir_node *node)
2751 const ir_edge_t *edge;
2753 /** we can only have eq and lg projs */
2754 foreach_out_edge(node, edge) {
2755 ir_node *proj = get_edge_src_irn(edge);
2756 pn_Cmp pnc = get_Proj_proj(proj);
2757 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2765 * returns true if it is assured, that the upper bits of a node are "clean"
2766 * which means for a 16 or 8 bit value, that the upper bits in the register
2767 * are 0 for unsigned and a copy of the last significant bit for signed
2770 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2772 assert(ia32_mode_needs_gp_reg(mode));
2773 if (get_mode_size_bits(mode) >= 32)
2776 if (is_Proj(transformed_node))
2777 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2779 switch (get_ia32_irn_opcode(transformed_node)) {
2780 case iro_ia32_Conv_I2I:
2781 case iro_ia32_Conv_I2I8Bit: {
2782 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2783 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2785 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2792 if (mode_is_signed(mode)) {
2793 return false; /* TODO handle signed modes */
2795 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2796 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2797 const ia32_immediate_attr_t *attr
2798 = get_ia32_immediate_attr_const(right);
2799 if (attr->symconst == 0 &&
2800 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2804 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2808 /* TODO too conservative if shift amount is constant */
2809 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2812 if (!mode_is_signed(mode)) {
2814 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2815 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2817 /* TODO if one is known to be zero extended, then || is sufficient */
2822 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2823 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2825 case iro_ia32_Const:
2826 case iro_ia32_Immediate: {
2827 const ia32_immediate_attr_t *attr =
2828 get_ia32_immediate_attr_const(transformed_node);
2829 if (mode_is_signed(mode)) {
2830 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2831 return shifted == 0 || shifted == -1;
2833 unsigned long shifted = (unsigned long)attr->offset;
2834 shifted >>= get_mode_size_bits(mode);
2835 return shifted == 0;
2845 * Generate code for a Cmp.
2847 static ir_node *gen_Cmp(ir_node *node)
2849 dbg_info *dbgi = get_irn_dbg_info(node);
2850 ir_node *block = get_nodes_block(node);
2851 ir_node *new_block = be_transform_node(block);
2852 ir_node *left = get_Cmp_left(node);
2853 ir_node *right = get_Cmp_right(node);
2854 ir_mode *cmp_mode = get_irn_mode(left);
2856 ia32_address_mode_t am;
2857 ia32_address_t *addr = &am.addr;
2860 if (mode_is_float(cmp_mode)) {
2861 if (ia32_cg_config.use_sse2) {
2862 return create_Ucomi(node);
2864 return create_Fucom(node);
2868 assert(ia32_mode_needs_gp_reg(cmp_mode));
2870 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2871 cmp_unsigned = !mode_is_signed(cmp_mode);
2872 if (is_Const_0(right) &&
2874 get_irn_n_edges(left) == 1 &&
2875 can_fold_test_and(node)) {
2876 /* Test(and_left, and_right) */
2877 ir_node *and_left = get_And_left(left);
2878 ir_node *and_right = get_And_right(left);
2880 /* matze: code here used mode instead of cmd_mode, I think it is always
2881 * the same as cmp_mode, but I leave this here to see if this is really
2884 assert(get_irn_mode(and_left) == cmp_mode);
2886 match_arguments(&am, block, and_left, and_right, NULL,
2888 match_am | match_8bit_am | match_16bit_am |
2889 match_am_and_immediates | match_immediate);
2891 /* use 32bit compare mode if possible since the opcode is smaller */
2892 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2893 upper_bits_clean(am.new_op2, cmp_mode)) {
2894 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2897 if (get_mode_size_bits(cmp_mode) == 8) {
2898 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2899 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2902 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2903 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2906 /* Cmp(left, right) */
2907 match_arguments(&am, block, left, right, NULL,
2908 match_commutative | match_am | match_8bit_am |
2909 match_16bit_am | match_am_and_immediates |
2911 /* use 32bit compare mode if possible since the opcode is smaller */
2912 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2913 upper_bits_clean(am.new_op2, cmp_mode)) {
2914 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2917 if (get_mode_size_bits(cmp_mode) == 8) {
2918 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2919 addr->index, addr->mem, am.new_op1,
2920 am.new_op2, am.ins_permuted,
2923 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2924 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2927 set_am_attributes(new_node, &am);
2928 set_ia32_ls_mode(new_node, cmp_mode);
2930 SET_IA32_ORIG_NODE(new_node, node);
2932 new_node = fix_mem_proj(new_node, &am);
2937 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2940 dbg_info *dbgi = get_irn_dbg_info(node);
2941 ir_node *block = get_nodes_block(node);
2942 ir_node *new_block = be_transform_node(block);
2943 ir_node *val_true = get_Mux_true(node);
2944 ir_node *val_false = get_Mux_false(node);
2946 ia32_address_mode_t am;
2947 ia32_address_t *addr;
2949 assert(ia32_cg_config.use_cmov);
2950 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2954 match_arguments(&am, block, val_false, val_true, flags,
2955 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2957 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2958 addr->mem, am.new_op1, am.new_op2, new_flags,
2959 am.ins_permuted, pnc);
2960 set_am_attributes(new_node, &am);
2962 SET_IA32_ORIG_NODE(new_node, node);
2964 new_node = fix_mem_proj(new_node, &am);
2970 * Creates a ia32 Setcc instruction.
2972 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2973 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2976 ir_mode *mode = get_irn_mode(orig_node);
2979 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2980 SET_IA32_ORIG_NODE(new_node, orig_node);
2982 /* we might need to conv the result up */
2983 if (get_mode_size_bits(mode) > 8) {
2984 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2985 nomem, new_node, mode_Bu);
2986 SET_IA32_ORIG_NODE(new_node, orig_node);
2993 * Create instruction for an unsigned Difference or Zero.
2995 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2997 ir_graph *irg = current_ir_graph;
2998 ir_mode *mode = get_irn_mode(psi);
2999 ir_node *new_node, *sub, *sbb, *eflags, *block;
3003 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3004 match_mode_neutral | match_am | match_immediate | match_two_users);
3006 block = get_nodes_block(new_node);
3008 if (is_Proj(new_node)) {
3009 sub = get_Proj_pred(new_node);
3010 assert(is_ia32_Sub(sub));
3013 set_irn_mode(sub, mode_T);
3014 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
3016 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3018 dbgi = get_irn_dbg_info(psi);
3019 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3021 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3022 set_ia32_commutative(new_node);
3027 * Create an const array of two float consts.
3029 * @param c0 the first constant
3030 * @param c1 the second constant
3031 * @param new_mode IN/OUT for the mode of the constants, if NULL
3032 * smallest possible mode will be used
3034 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3036 ir_mode *mode = *new_mode;
3038 ir_initializer_t *initializer;
3039 tarval *tv0 = get_Const_tarval(c0);
3040 tarval *tv1 = get_Const_tarval(c1);
3043 /* detect the best mode for the constants */
3044 mode = get_tarval_mode(tv0);
3046 if (mode != mode_F) {
3047 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3048 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3050 tv0 = tarval_convert_to(tv0, mode);
3051 tv1 = tarval_convert_to(tv1, mode);
3052 } else if (mode != mode_D) {
3053 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3054 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3056 tv0 = tarval_convert_to(tv0, mode);
3057 tv1 = tarval_convert_to(tv1, mode);
3064 tp = ia32_create_float_type(mode, 4);
3065 tp = ia32_create_float_array(tp);
3067 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3069 set_entity_ld_ident(ent, get_entity_ident(ent));
3070 set_entity_visibility(ent, visibility_local);
3071 set_entity_variability(ent, variability_constant);
3072 set_entity_allocation(ent, allocation_static);
3074 initializer = create_initializer_compound(2);
3076 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3077 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3079 set_entity_initializer(ent, initializer);
3086 * Transforms a Mux node into some code sequence.
3088 * @return The transformed node.
3090 static ir_node *gen_Mux(ir_node *node)
3092 dbg_info *dbgi = get_irn_dbg_info(node);
3093 ir_node *block = get_nodes_block(node);
3094 ir_node *new_block = be_transform_node(block);
3095 ir_node *mux_true = get_Mux_true(node);
3096 ir_node *mux_false = get_Mux_false(node);
3097 ir_node *cond = get_Mux_sel(node);
3098 ir_mode *mode = get_irn_mode(node);
3103 assert(get_irn_mode(cond) == mode_b);
3105 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3106 if (mode_is_float(mode)) {
3107 ir_node *cmp = get_Proj_pred(cond);
3108 ir_node *cmp_left = get_Cmp_left(cmp);
3109 ir_node *cmp_right = get_Cmp_right(cmp);
3110 pn_Cmp pnc = get_Proj_proj(cond);
3112 if (ia32_cg_config.use_sse2) {
3113 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3114 if (cmp_left == mux_true && cmp_right == mux_false) {
3115 /* Mux(a <= b, a, b) => MIN */
3116 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3117 match_commutative | match_am | match_two_users);
3118 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3119 /* Mux(a <= b, b, a) => MAX */
3120 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3121 match_commutative | match_am | match_two_users);
3123 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3124 if (cmp_left == mux_true && cmp_right == mux_false) {
3125 /* Mux(a >= b, a, b) => MAX */
3126 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3127 match_commutative | match_am | match_two_users);
3128 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3129 /* Mux(a >= b, b, a) => MIN */
3130 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3131 match_commutative | match_am | match_two_users);
3135 if (is_Const(mux_true) && is_Const(mux_false)) {
3136 ia32_address_mode_t am;
3141 flags = get_flags_node(cond, &pnc);
3142 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3144 if (ia32_cg_config.use_sse2) {
3145 /* cannot load from different mode on SSE */
3148 /* x87 can load any mode */
3152 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3154 switch (get_mode_size_bytes(new_mode)) {
3164 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3165 set_ia32_am_scale(new_node, 2);
3170 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3171 set_ia32_am_scale(new_node, 1);
3174 /* arg, shift 16 NOT supported */
3176 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3179 panic("Unsupported constant size");
3182 am.ls_mode = new_mode;
3183 am.addr.base = noreg_GP;
3184 am.addr.index = new_node;
3185 am.addr.mem = nomem;
3187 am.addr.scale = scale;
3188 am.addr.use_frame = 0;
3189 am.addr.frame_entity = NULL;
3190 am.addr.symconst_sign = 0;
3191 am.mem_proj = am.addr.mem;
3192 am.op_type = ia32_AddrModeS;
3195 am.pinned = op_pin_state_floats;
3197 am.ins_permuted = 0;
3199 if (ia32_cg_config.use_sse2)
3200 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3202 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3203 set_am_attributes(load, &am);
3205 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3207 panic("cannot transform floating point Mux");
3210 assert(ia32_mode_needs_gp_reg(mode));
3212 if (is_Proj(cond)) {
3213 ir_node *cmp = get_Proj_pred(cond);
3215 ir_node *cmp_left = get_Cmp_left(cmp);
3216 ir_node *cmp_right = get_Cmp_right(cmp);
3217 pn_Cmp pnc = get_Proj_proj(cond);
3219 /* check for unsigned Doz first */
3220 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3221 is_Const_0(mux_false) && is_Sub(mux_true) &&
3222 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3223 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3224 return create_Doz(node, cmp_left, cmp_right);
3225 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3226 is_Const_0(mux_true) && is_Sub(mux_false) &&
3227 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3228 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3229 return create_Doz(node, cmp_left, cmp_right);
3234 flags = get_flags_node(cond, &pnc);
3236 if (is_Const(mux_true) && is_Const(mux_false)) {
3237 /* both are const, good */
3238 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3239 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3240 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3241 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3243 /* Not that simple. */
3248 new_node = create_CMov(node, cond, flags, pnc);
3256 * Create a conversion from x87 state register to general purpose.
3258 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3260 ir_node *block = be_transform_node(get_nodes_block(node));
3261 ir_node *op = get_Conv_op(node);
3262 ir_node *new_op = be_transform_node(op);
3263 ir_graph *irg = current_ir_graph;
3264 dbg_info *dbgi = get_irn_dbg_info(node);
3265 ir_mode *mode = get_irn_mode(node);
3266 ir_node *fist, *load, *mem;
3268 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3269 set_irn_pinned(fist, op_pin_state_floats);
3270 set_ia32_use_frame(fist);
3271 set_ia32_op_type(fist, ia32_AddrModeD);
3273 assert(get_mode_size_bits(mode) <= 32);
3274 /* exception we can only store signed 32 bit integers, so for unsigned
3275 we store a 64bit (signed) integer and load the lower bits */
3276 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3277 set_ia32_ls_mode(fist, mode_Ls);
3279 set_ia32_ls_mode(fist, mode_Is);
3281 SET_IA32_ORIG_NODE(fist, node);
3284 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3286 set_irn_pinned(load, op_pin_state_floats);
3287 set_ia32_use_frame(load);
3288 set_ia32_op_type(load, ia32_AddrModeS);
3289 set_ia32_ls_mode(load, mode_Is);
3290 if (get_ia32_ls_mode(fist) == mode_Ls) {
3291 ia32_attr_t *attr = get_ia32_attr(load);
3292 attr->data.need_64bit_stackent = 1;
3294 ia32_attr_t *attr = get_ia32_attr(load);
3295 attr->data.need_32bit_stackent = 1;
3297 SET_IA32_ORIG_NODE(load, node);
3299 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3303 * Creates a x87 strict Conv by placing a Store and a Load
3305 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3307 ir_node *block = get_nodes_block(node);
3308 ir_graph *irg = current_ir_graph;
3309 dbg_info *dbgi = get_irn_dbg_info(node);
3310 ir_node *frame = get_irg_frame(irg);
3311 ir_node *store, *load;
3314 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3315 set_ia32_use_frame(store);
3316 set_ia32_op_type(store, ia32_AddrModeD);
3317 SET_IA32_ORIG_NODE(store, node);
3319 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3320 set_ia32_use_frame(load);
3321 set_ia32_op_type(load, ia32_AddrModeS);
3322 SET_IA32_ORIG_NODE(load, node);
3324 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3328 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3329 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3331 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3333 func = get_mode_size_bits(mode) == 8 ?
3334 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3335 return func(dbgi, block, base, index, mem, val, mode);
3339 * Create a conversion from general purpose to x87 register
3341 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3343 ir_node *src_block = get_nodes_block(node);
3344 ir_node *block = be_transform_node(src_block);
3345 ir_graph *irg = current_ir_graph;
3346 dbg_info *dbgi = get_irn_dbg_info(node);
3347 ir_node *op = get_Conv_op(node);
3348 ir_node *new_op = NULL;
3350 ir_mode *store_mode;
3355 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3356 if (possible_int_mode_for_fp(src_mode)) {
3357 ia32_address_mode_t am;
3359 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3360 if (am.op_type == ia32_AddrModeS) {
3361 ia32_address_t *addr = &am.addr;
3363 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3364 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3366 set_am_attributes(fild, &am);
3367 SET_IA32_ORIG_NODE(fild, node);
3369 fix_mem_proj(fild, &am);
3374 if (new_op == NULL) {
3375 new_op = be_transform_node(op);
3378 mode = get_irn_mode(op);
3380 /* first convert to 32 bit signed if necessary */
3381 if (get_mode_size_bits(src_mode) < 32) {
3382 if (!upper_bits_clean(new_op, src_mode)) {
3383 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3384 SET_IA32_ORIG_NODE(new_op, node);
3389 assert(get_mode_size_bits(mode) == 32);
3392 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3394 set_ia32_use_frame(store);
3395 set_ia32_op_type(store, ia32_AddrModeD);
3396 set_ia32_ls_mode(store, mode_Iu);
3398 /* exception for 32bit unsigned, do a 64bit spill+load */
3399 if (!mode_is_signed(mode)) {
3402 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3404 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3405 noreg_GP, nomem, zero_const);
3407 set_ia32_use_frame(zero_store);
3408 set_ia32_op_type(zero_store, ia32_AddrModeD);
3409 add_ia32_am_offs_int(zero_store, 4);
3410 set_ia32_ls_mode(zero_store, mode_Iu);
3415 store = new_rd_Sync(dbgi, irg, block, 2, in);
3416 store_mode = mode_Ls;
3418 store_mode = mode_Is;
3422 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3424 set_ia32_use_frame(fild);
3425 set_ia32_op_type(fild, ia32_AddrModeS);
3426 set_ia32_ls_mode(fild, store_mode);
3428 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3434 * Create a conversion from one integer mode into another one
3436 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3437 dbg_info *dbgi, ir_node *block, ir_node *op,
3440 ir_node *new_block = be_transform_node(block);
3442 ir_mode *smaller_mode;
3443 ia32_address_mode_t am;
3444 ia32_address_t *addr = &am.addr;
3447 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3448 smaller_mode = src_mode;
3450 smaller_mode = tgt_mode;
3453 #ifdef DEBUG_libfirm
3455 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3460 match_arguments(&am, block, NULL, op, NULL,
3461 match_am | match_8bit_am | match_16bit_am);
3463 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3464 /* unnecessary conv. in theory it shouldn't have been AM */
3465 assert(is_ia32_NoReg_GP(addr->base));
3466 assert(is_ia32_NoReg_GP(addr->index));
3467 assert(is_NoMem(addr->mem));
3468 assert(am.addr.offset == 0);
3469 assert(am.addr.symconst_ent == NULL);
3473 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3474 addr->mem, am.new_op2, smaller_mode);
3475 set_am_attributes(new_node, &am);
3476 /* match_arguments assume that out-mode = in-mode, this isn't true here
3478 set_ia32_ls_mode(new_node, smaller_mode);
3479 SET_IA32_ORIG_NODE(new_node, node);
3480 new_node = fix_mem_proj(new_node, &am);
3485 * Transforms a Conv node.
3487 * @return The created ia32 Conv node
3489 static ir_node *gen_Conv(ir_node *node)
3491 ir_node *block = get_nodes_block(node);
3492 ir_node *new_block = be_transform_node(block);
3493 ir_node *op = get_Conv_op(node);
3494 ir_node *new_op = NULL;
3495 dbg_info *dbgi = get_irn_dbg_info(node);
3496 ir_mode *src_mode = get_irn_mode(op);
3497 ir_mode *tgt_mode = get_irn_mode(node);
3498 int src_bits = get_mode_size_bits(src_mode);
3499 int tgt_bits = get_mode_size_bits(tgt_mode);
3500 ir_node *res = NULL;
3502 assert(!mode_is_int(src_mode) || src_bits <= 32);
3503 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3505 if (src_mode == mode_b) {
3506 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3507 /* nothing to do, we already model bools as 0/1 ints */
3508 return be_transform_node(op);
3511 if (src_mode == tgt_mode) {
3512 if (get_Conv_strict(node)) {
3513 if (ia32_cg_config.use_sse2) {
3514 /* when we are in SSE mode, we can kill all strict no-op conversion */
3515 return be_transform_node(op);
3518 /* this should be optimized already, but who knows... */
3519 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3520 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3521 return be_transform_node(op);
3525 if (mode_is_float(src_mode)) {
3526 new_op = be_transform_node(op);
3527 /* we convert from float ... */
3528 if (mode_is_float(tgt_mode)) {
3530 /* Matze: I'm a bit unsure what the following is for? seems wrong
3532 if (src_mode == mode_E && tgt_mode == mode_D
3533 && !get_Conv_strict(node)) {
3534 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3540 if (ia32_cg_config.use_sse2) {
3541 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3542 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3544 set_ia32_ls_mode(res, tgt_mode);
3546 if (get_Conv_strict(node)) {
3547 /* if fp_no_float_fold is not set then we assume that we
3548 * don't have any float operations in a non
3549 * mode_float_arithmetic mode and can skip strict upconvs */
3550 if (src_bits < tgt_bits
3551 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3552 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3555 res = gen_x87_strict_conv(tgt_mode, new_op);
3556 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3560 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3565 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3566 if (ia32_cg_config.use_sse2) {
3567 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3569 set_ia32_ls_mode(res, src_mode);
3571 return gen_x87_fp_to_gp(node);
3575 /* we convert from int ... */
3576 if (mode_is_float(tgt_mode)) {
3578 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3579 if (ia32_cg_config.use_sse2) {
3580 new_op = be_transform_node(op);
3581 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3583 set_ia32_ls_mode(res, tgt_mode);
3585 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3586 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3587 res = gen_x87_gp_to_fp(node, src_mode);
3589 /* we need a strict-Conv, if the int mode has more bits than the
3591 if (float_mantissa < int_mantissa) {
3592 res = gen_x87_strict_conv(tgt_mode, res);
3593 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3597 } else if (tgt_mode == mode_b) {
3598 /* mode_b lowering already took care that we only have 0/1 values */
3599 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3600 src_mode, tgt_mode));
3601 return be_transform_node(op);
3604 if (src_bits == tgt_bits) {
3605 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3606 src_mode, tgt_mode));
3607 return be_transform_node(op);
3610 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3618 static ir_node *create_immediate_or_transform(ir_node *node,
3619 char immediate_constraint_type)
3621 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3622 if (new_node == NULL) {
3623 new_node = be_transform_node(node);
3629 * Transforms a FrameAddr into an ia32 Add.
3631 static ir_node *gen_be_FrameAddr(ir_node *node)
3633 ir_node *block = be_transform_node(get_nodes_block(node));
3634 ir_node *op = be_get_FrameAddr_frame(node);
3635 ir_node *new_op = be_transform_node(op);
3636 dbg_info *dbgi = get_irn_dbg_info(node);
3639 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3640 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3641 set_ia32_use_frame(new_node);
3643 SET_IA32_ORIG_NODE(new_node, node);
3649 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3651 static ir_node *gen_be_Return(ir_node *node)
3653 ir_graph *irg = current_ir_graph;
3654 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3655 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3656 ir_entity *ent = get_irg_entity(irg);
3657 ir_type *tp = get_entity_type(ent);
3662 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3663 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3665 int pn_ret_val, pn_ret_mem, arity, i;
3667 assert(ret_val != NULL);
3668 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3669 return be_duplicate_node(node);
3672 res_type = get_method_res_type(tp, 0);
3674 if (! is_Primitive_type(res_type)) {
3675 return be_duplicate_node(node);
3678 mode = get_type_mode(res_type);
3679 if (! mode_is_float(mode)) {
3680 return be_duplicate_node(node);
3683 assert(get_method_n_ress(tp) == 1);
3685 pn_ret_val = get_Proj_proj(ret_val);
3686 pn_ret_mem = get_Proj_proj(ret_mem);
3688 /* get the Barrier */
3689 barrier = get_Proj_pred(ret_val);
3691 /* get result input of the Barrier */
3692 ret_val = get_irn_n(barrier, pn_ret_val);
3693 new_ret_val = be_transform_node(ret_val);
3695 /* get memory input of the Barrier */
3696 ret_mem = get_irn_n(barrier, pn_ret_mem);
3697 new_ret_mem = be_transform_node(ret_mem);
3699 frame = get_irg_frame(irg);
3701 dbgi = get_irn_dbg_info(barrier);
3702 block = be_transform_node(get_nodes_block(barrier));
3704 /* store xmm0 onto stack */
3705 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3706 new_ret_mem, new_ret_val);
3707 set_ia32_ls_mode(sse_store, mode);
3708 set_ia32_op_type(sse_store, ia32_AddrModeD);
3709 set_ia32_use_frame(sse_store);
3711 /* load into x87 register */
3712 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3713 set_ia32_op_type(fld, ia32_AddrModeS);
3714 set_ia32_use_frame(fld);
3716 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3717 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3719 /* create a new barrier */
3720 arity = get_irn_arity(barrier);
3721 in = ALLOCAN(ir_node*, arity);
3722 for (i = 0; i < arity; ++i) {
3725 if (i == pn_ret_val) {
3727 } else if (i == pn_ret_mem) {
3730 ir_node *in = get_irn_n(barrier, i);
3731 new_in = be_transform_node(in);
3736 new_barrier = new_ir_node(dbgi, irg, block,
3737 get_irn_op(barrier), get_irn_mode(barrier),
3739 copy_node_attr(barrier, new_barrier);
3740 be_duplicate_deps(barrier, new_barrier);
3741 be_set_transformed_node(barrier, new_barrier);
3743 /* transform normally */
3744 return be_duplicate_node(node);
3748 * Transform a be_AddSP into an ia32_SubSP.
3750 static ir_node *gen_be_AddSP(ir_node *node)
3752 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3753 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3755 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3756 match_am | match_immediate);
3760 * Transform a be_SubSP into an ia32_AddSP
3762 static ir_node *gen_be_SubSP(ir_node *node)
3764 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3765 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3767 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3768 match_am | match_immediate);
3772 * Change some phi modes
3774 static ir_node *gen_Phi(ir_node *node)
3776 ir_node *block = be_transform_node(get_nodes_block(node));
3777 ir_graph *irg = current_ir_graph;
3778 dbg_info *dbgi = get_irn_dbg_info(node);
3779 ir_mode *mode = get_irn_mode(node);
3782 if (ia32_mode_needs_gp_reg(mode)) {
3783 /* we shouldn't have any 64bit stuff around anymore */
3784 assert(get_mode_size_bits(mode) <= 32);
3785 /* all integer operations are on 32bit registers now */
3787 } else if (mode_is_float(mode)) {
3788 if (ia32_cg_config.use_sse2) {
3795 /* phi nodes allow loops, so we use the old arguments for now
3796 * and fix this later */
3797 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3798 get_irn_in(node) + 1);
3799 copy_node_attr(node, phi);
3800 be_duplicate_deps(node, phi);
3802 be_enqueue_preds(node);
3810 static ir_node *gen_IJmp(ir_node *node)
3812 ir_node *block = get_nodes_block(node);
3813 ir_node *new_block = be_transform_node(block);
3814 dbg_info *dbgi = get_irn_dbg_info(node);
3815 ir_node *op = get_IJmp_target(node);
3817 ia32_address_mode_t am;
3818 ia32_address_t *addr = &am.addr;
3820 assert(get_irn_mode(op) == mode_P);
3822 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3824 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3825 addr->mem, am.new_op2);
3826 set_am_attributes(new_node, &am);
3827 SET_IA32_ORIG_NODE(new_node, node);
3829 new_node = fix_mem_proj(new_node, &am);
3835 * Transform a Bound node.
3837 static ir_node *gen_Bound(ir_node *node)
3840 ir_node *lower = get_Bound_lower(node);
3841 dbg_info *dbgi = get_irn_dbg_info(node);
3843 if (is_Const_0(lower)) {
3844 /* typical case for Java */
3845 ir_node *sub, *res, *flags, *block;
3846 ir_graph *irg = current_ir_graph;
3848 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3849 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3851 block = get_nodes_block(res);
3852 if (! is_Proj(res)) {
3854 set_irn_mode(sub, mode_T);
3855 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3857 sub = get_Proj_pred(res);
3859 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3860 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3861 SET_IA32_ORIG_NODE(new_node, node);
3863 panic("generic Bound not supported in ia32 Backend");
3869 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3871 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3872 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3874 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3875 match_immediate | match_mode_neutral);
3878 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3880 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3881 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3882 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3886 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3888 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3889 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3890 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3894 static ir_node *gen_ia32_l_Add(ir_node *node)
3896 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3897 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3898 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3899 match_commutative | match_am | match_immediate |
3900 match_mode_neutral);
3902 if (is_Proj(lowered)) {
3903 lowered = get_Proj_pred(lowered);
3905 assert(is_ia32_Add(lowered));
3906 set_irn_mode(lowered, mode_T);
3912 static ir_node *gen_ia32_l_Adc(ir_node *node)
3914 return gen_binop_flags(node, new_bd_ia32_Adc,
3915 match_commutative | match_am | match_immediate |
3916 match_mode_neutral);
3920 * Transforms a l_MulS into a "real" MulS node.
3922 * @return the created ia32 Mul node
3924 static ir_node *gen_ia32_l_Mul(ir_node *node)
3926 ir_node *left = get_binop_left(node);
3927 ir_node *right = get_binop_right(node);
3929 return gen_binop(node, left, right, new_bd_ia32_Mul,
3930 match_commutative | match_am | match_mode_neutral);
3934 * Transforms a l_IMulS into a "real" IMul1OPS node.
3936 * @return the created ia32 IMul1OP node
3938 static ir_node *gen_ia32_l_IMul(ir_node *node)
3940 ir_node *left = get_binop_left(node);
3941 ir_node *right = get_binop_right(node);
3943 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3944 match_commutative | match_am | match_mode_neutral);
3947 static ir_node *gen_ia32_l_Sub(ir_node *node)
3949 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3950 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3951 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3952 match_am | match_immediate | match_mode_neutral);
3954 if (is_Proj(lowered)) {
3955 lowered = get_Proj_pred(lowered);
3957 assert(is_ia32_Sub(lowered));
3958 set_irn_mode(lowered, mode_T);
3964 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3966 return gen_binop_flags(node, new_bd_ia32_Sbb,
3967 match_am | match_immediate | match_mode_neutral);
3971 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3972 * op1 - target to be shifted
3973 * op2 - contains bits to be shifted into target
3975 * Only op3 can be an immediate.
3977 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3978 ir_node *low, ir_node *count)
3980 ir_node *block = get_nodes_block(node);
3981 ir_node *new_block = be_transform_node(block);
3982 dbg_info *dbgi = get_irn_dbg_info(node);
3983 ir_node *new_high = be_transform_node(high);
3984 ir_node *new_low = be_transform_node(low);
3988 /* the shift amount can be any mode that is bigger than 5 bits, since all
3989 * other bits are ignored anyway */
3990 while (is_Conv(count) &&
3991 get_irn_n_edges(count) == 1 &&
3992 mode_is_int(get_irn_mode(count))) {
3993 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3994 count = get_Conv_op(count);
3996 new_count = create_immediate_or_transform(count, 0);
3998 if (is_ia32_l_ShlD(node)) {
3999 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4002 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4005 SET_IA32_ORIG_NODE(new_node, node);
4010 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4012 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4013 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4014 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4015 return gen_lowered_64bit_shifts(node, high, low, count);
4018 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4020 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4021 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4022 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4023 return gen_lowered_64bit_shifts(node, high, low, count);
4026 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4028 ir_node *src_block = get_nodes_block(node);
4029 ir_node *block = be_transform_node(src_block);
4030 ir_graph *irg = current_ir_graph;
4031 dbg_info *dbgi = get_irn_dbg_info(node);
4032 ir_node *frame = get_irg_frame(irg);
4033 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4034 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4035 ir_node *new_val_low = be_transform_node(val_low);
4036 ir_node *new_val_high = be_transform_node(val_high);
4038 ir_node *sync, *fild, *res;
4039 ir_node *store_low, *store_high;
4041 if (ia32_cg_config.use_sse2) {
4042 panic("ia32_l_LLtoFloat not implemented for SSE2");
4046 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4048 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4050 SET_IA32_ORIG_NODE(store_low, node);
4051 SET_IA32_ORIG_NODE(store_high, node);
4053 set_ia32_use_frame(store_low);
4054 set_ia32_use_frame(store_high);
4055 set_ia32_op_type(store_low, ia32_AddrModeD);
4056 set_ia32_op_type(store_high, ia32_AddrModeD);
4057 set_ia32_ls_mode(store_low, mode_Iu);
4058 set_ia32_ls_mode(store_high, mode_Is);
4059 add_ia32_am_offs_int(store_high, 4);
4063 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4066 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4068 set_ia32_use_frame(fild);
4069 set_ia32_op_type(fild, ia32_AddrModeS);
4070 set_ia32_ls_mode(fild, mode_Ls);
4072 SET_IA32_ORIG_NODE(fild, node);
4074 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4076 if (! mode_is_signed(get_irn_mode(val_high))) {
4077 ia32_address_mode_t am;
4079 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4082 am.addr.base = noreg_GP;
4083 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4084 am.addr.mem = nomem;
4087 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4088 am.addr.use_frame = 0;
4089 am.addr.frame_entity = NULL;
4090 am.addr.symconst_sign = 0;
4091 am.ls_mode = mode_F;
4092 am.mem_proj = nomem;
4093 am.op_type = ia32_AddrModeS;
4095 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4096 am.pinned = op_pin_state_floats;
4098 am.ins_permuted = 0;
4100 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4101 am.new_op1, am.new_op2, get_fpcw());
4102 set_am_attributes(fadd, &am);
4104 set_irn_mode(fadd, mode_T);
4105 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4110 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4112 ir_node *src_block = get_nodes_block(node);
4113 ir_node *block = be_transform_node(src_block);
4114 ir_graph *irg = current_ir_graph;
4115 dbg_info *dbgi = get_irn_dbg_info(node);
4116 ir_node *frame = get_irg_frame(irg);
4117 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4118 ir_node *new_val = be_transform_node(val);
4119 ir_node *fist, *mem;
4121 mem = gen_vfist(dbgi, irg, block, frame, noreg_GP, nomem, new_val, &fist);
4122 SET_IA32_ORIG_NODE(fist, node);
4123 set_ia32_use_frame(fist);
4124 set_ia32_op_type(fist, ia32_AddrModeD);
4125 set_ia32_ls_mode(fist, mode_Ls);
4131 * the BAD transformer.
4133 static ir_node *bad_transform(ir_node *node)
4135 panic("No transform function for %+F available.", node);
4139 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4141 ir_graph *irg = current_ir_graph;
4142 ir_node *block = be_transform_node(get_nodes_block(node));
4143 ir_node *pred = get_Proj_pred(node);
4144 ir_node *new_pred = be_transform_node(pred);
4145 ir_node *frame = get_irg_frame(irg);
4146 dbg_info *dbgi = get_irn_dbg_info(node);
4147 long pn = get_Proj_proj(node);
4152 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4153 SET_IA32_ORIG_NODE(load, node);
4154 set_ia32_use_frame(load);
4155 set_ia32_op_type(load, ia32_AddrModeS);
4156 set_ia32_ls_mode(load, mode_Iu);
4157 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4158 * 32 bit from it with this particular load */
4159 attr = get_ia32_attr(load);
4160 attr->data.need_64bit_stackent = 1;
4162 if (pn == pn_ia32_l_FloattoLL_res_high) {
4163 add_ia32_am_offs_int(load, 4);
4165 assert(pn == pn_ia32_l_FloattoLL_res_low);
4168 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4174 * Transform the Projs of an AddSP.
4176 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4178 ir_node *block = be_transform_node(get_nodes_block(node));
4179 ir_node *pred = get_Proj_pred(node);
4180 ir_node *new_pred = be_transform_node(pred);
4181 ir_graph *irg = current_ir_graph;
4182 dbg_info *dbgi = get_irn_dbg_info(node);
4183 long proj = get_Proj_proj(node);
4185 if (proj == pn_be_AddSP_sp) {
4186 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4187 pn_ia32_SubSP_stack);
4188 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4190 } else if (proj == pn_be_AddSP_res) {
4191 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4192 pn_ia32_SubSP_addr);
4193 } else if (proj == pn_be_AddSP_M) {
4194 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4197 panic("No idea how to transform proj->AddSP");
4201 * Transform the Projs of a SubSP.
4203 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4205 ir_node *block = be_transform_node(get_nodes_block(node));
4206 ir_node *pred = get_Proj_pred(node);
4207 ir_node *new_pred = be_transform_node(pred);
4208 ir_graph *irg = current_ir_graph;
4209 dbg_info *dbgi = get_irn_dbg_info(node);
4210 long proj = get_Proj_proj(node);
4212 if (proj == pn_be_SubSP_sp) {
4213 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4214 pn_ia32_AddSP_stack);
4215 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4217 } else if (proj == pn_be_SubSP_M) {
4218 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4221 panic("No idea how to transform proj->SubSP");
4225 * Transform and renumber the Projs from a Load.
4227 static ir_node *gen_Proj_Load(ir_node *node)
4230 ir_node *block = be_transform_node(get_nodes_block(node));
4231 ir_node *pred = get_Proj_pred(node);
4232 ir_graph *irg = current_ir_graph;
4233 dbg_info *dbgi = get_irn_dbg_info(node);
4234 long proj = get_Proj_proj(node);
4236 /* loads might be part of source address mode matches, so we don't
4237 * transform the ProjMs yet (with the exception of loads whose result is
4240 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4243 /* this is needed, because sometimes we have loops that are only
4244 reachable through the ProjM */
4245 be_enqueue_preds(node);
4246 /* do it in 2 steps, to silence firm verifier */
4247 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4248 set_Proj_proj(res, pn_ia32_mem);
4252 /* renumber the proj */
4253 new_pred = be_transform_node(pred);
4254 if (is_ia32_Load(new_pred)) {
4257 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4259 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4260 case pn_Load_X_regular:
4261 return new_rd_Jmp(dbgi, irg, block);
4262 case pn_Load_X_except:
4263 /* This Load might raise an exception. Mark it. */
4264 set_ia32_exc_label(new_pred, 1);
4265 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4269 } else if (is_ia32_Conv_I2I(new_pred) ||
4270 is_ia32_Conv_I2I8Bit(new_pred)) {
4271 set_irn_mode(new_pred, mode_T);
4272 if (proj == pn_Load_res) {
4273 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4274 } else if (proj == pn_Load_M) {
4275 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4277 } else if (is_ia32_xLoad(new_pred)) {
4280 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4282 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4283 case pn_Load_X_regular:
4284 return new_rd_Jmp(dbgi, irg, block);
4285 case pn_Load_X_except:
4286 /* This Load might raise an exception. Mark it. */
4287 set_ia32_exc_label(new_pred, 1);
4288 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4292 } else if (is_ia32_vfld(new_pred)) {
4295 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4297 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4298 case pn_Load_X_regular:
4299 return new_rd_Jmp(dbgi, irg, block);
4300 case pn_Load_X_except:
4301 /* This Load might raise an exception. Mark it. */
4302 set_ia32_exc_label(new_pred, 1);
4303 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4308 /* can happen for ProJMs when source address mode happened for the
4311 /* however it should not be the result proj, as that would mean the
4312 load had multiple users and should not have been used for
4314 if (proj != pn_Load_M) {
4315 panic("internal error: transformed node not a Load");
4317 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4320 panic("No idea how to transform proj");
4324 * Transform and renumber the Projs from a DivMod like instruction.
4326 static ir_node *gen_Proj_DivMod(ir_node *node)
4328 ir_node *block = be_transform_node(get_nodes_block(node));
4329 ir_node *pred = get_Proj_pred(node);
4330 ir_node *new_pred = be_transform_node(pred);
4331 ir_graph *irg = current_ir_graph;
4332 dbg_info *dbgi = get_irn_dbg_info(node);
4333 long proj = get_Proj_proj(node);
4335 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4337 switch (get_irn_opcode(pred)) {
4341 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4343 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4344 case pn_Div_X_regular:
4345 return new_rd_Jmp(dbgi, irg, block);
4346 case pn_Div_X_except:
4347 set_ia32_exc_label(new_pred, 1);
4348 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4356 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4358 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4359 case pn_Mod_X_except:
4360 set_ia32_exc_label(new_pred, 1);
4361 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4369 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4370 case pn_DivMod_res_div:
4371 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4372 case pn_DivMod_res_mod:
4373 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4374 case pn_DivMod_X_regular:
4375 return new_rd_Jmp(dbgi, irg, block);
4376 case pn_DivMod_X_except:
4377 set_ia32_exc_label(new_pred, 1);
4378 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4387 panic("No idea how to transform proj->DivMod");
4391 * Transform and renumber the Projs from a CopyB.
4393 static ir_node *gen_Proj_CopyB(ir_node *node)
4395 ir_node *block = be_transform_node(get_nodes_block(node));
4396 ir_node *pred = get_Proj_pred(node);
4397 ir_node *new_pred = be_transform_node(pred);
4398 ir_graph *irg = current_ir_graph;
4399 dbg_info *dbgi = get_irn_dbg_info(node);
4400 long proj = get_Proj_proj(node);
4403 case pn_CopyB_M_regular:
4404 if (is_ia32_CopyB_i(new_pred)) {
4405 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4406 } else if (is_ia32_CopyB(new_pred)) {
4407 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4414 panic("No idea how to transform proj->CopyB");
4418 * Transform and renumber the Projs from a Quot.
4420 static ir_node *gen_Proj_Quot(ir_node *node)
4422 ir_node *block = be_transform_node(get_nodes_block(node));
4423 ir_node *pred = get_Proj_pred(node);
4424 ir_node *new_pred = be_transform_node(pred);
4425 ir_graph *irg = current_ir_graph;
4426 dbg_info *dbgi = get_irn_dbg_info(node);
4427 long proj = get_Proj_proj(node);
4431 if (is_ia32_xDiv(new_pred)) {
4432 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4433 } else if (is_ia32_vfdiv(new_pred)) {
4434 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4438 if (is_ia32_xDiv(new_pred)) {
4439 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4440 } else if (is_ia32_vfdiv(new_pred)) {
4441 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4444 case pn_Quot_X_regular:
4445 case pn_Quot_X_except:
4450 panic("No idea how to transform proj->Quot");
4453 static ir_node *gen_be_Call(ir_node *node)
4455 dbg_info *const dbgi = get_irn_dbg_info(node);
4456 ir_graph *const irg = current_ir_graph;
4457 ir_node *const src_block = get_nodes_block(node);
4458 ir_node *const block = be_transform_node(src_block);
4459 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4460 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4461 ir_node *const sp = be_transform_node(src_sp);
4462 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4463 ia32_address_mode_t am;
4464 ia32_address_t *const addr = &am.addr;
4469 ir_node * eax = noreg_GP;
4470 ir_node * ecx = noreg_GP;
4471 ir_node * edx = noreg_GP;
4472 unsigned const pop = be_Call_get_pop(node);
4473 ir_type *const call_tp = be_Call_get_type(node);
4475 /* Run the x87 simulator if the call returns a float value */
4476 if (get_method_n_ress(call_tp) > 0) {
4477 ir_type *const res_type = get_method_res_type(call_tp, 0);
4478 ir_mode *const res_mode = get_type_mode(res_type);
4480 if (res_mode != NULL && mode_is_float(res_mode)) {
4481 env_cg->do_x87_sim = 1;
4485 /* We do not want be_Call direct calls */
4486 assert(be_Call_get_entity(node) == NULL);
4488 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4489 match_am | match_immediate);
4491 i = get_irn_arity(node) - 1;
4492 fpcw = be_transform_node(get_irn_n(node, i--));
4493 for (; i >= be_pos_Call_first_arg; --i) {
4494 arch_register_req_t const *const req = arch_get_register_req(node, i);
4495 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4497 assert(req->type == arch_register_req_type_limited);
4498 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4500 switch (*req->limited) {
4501 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4502 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4503 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4504 default: panic("Invalid GP register for register parameter");
4508 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4509 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4510 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4511 set_am_attributes(call, &am);
4512 call = fix_mem_proj(call, &am);
4514 if (get_irn_pinned(node) == op_pin_state_pinned)
4515 set_irn_pinned(call, op_pin_state_pinned);
4517 SET_IA32_ORIG_NODE(call, node);
4519 if (ia32_cg_config.use_sse2) {
4520 /* remember this call for post-processing */
4521 ARR_APP1(ir_node *, call_list, call);
4522 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4529 * Transform Builtin trap
4531 static ir_node *gen_trap(ir_node *node) {
4532 dbg_info *dbgi = get_irn_dbg_info(node);
4533 ir_node *block = be_transform_node(get_nodes_block(node));
4534 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4536 return new_bd_ia32_UD2(dbgi, block, mem);
4540 * Transform Builtin debugbreak
4542 static ir_node *gen_debugbreak(ir_node *node) {
4543 dbg_info *dbgi = get_irn_dbg_info(node);
4544 ir_node *block = be_transform_node(get_nodes_block(node));
4545 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4547 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4551 * Transform Builtin return_address
4553 static ir_node *gen_return_address(ir_node *node) {
4554 ir_node *param = get_Builtin_param(node, 0);
4555 ir_node *frame = get_Builtin_param(node, 1);
4556 dbg_info *dbgi = get_irn_dbg_info(node);
4557 tarval *tv = get_Const_tarval(param);
4558 unsigned long value = get_tarval_long(tv);
4560 ir_node *block = be_transform_node(get_nodes_block(node));
4561 ir_node *ptr = be_transform_node(frame);
4565 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4566 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4567 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4570 /* load the return address from this frame */
4571 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4573 set_irn_pinned(load, get_irn_pinned(node));
4574 set_ia32_op_type(load, ia32_AddrModeS);
4575 set_ia32_ls_mode(load, mode_Iu);
4577 set_ia32_am_offs_int(load, 0);
4578 set_ia32_use_frame(load);
4579 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4581 if (get_irn_pinned(node) == op_pin_state_floats) {
4582 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4583 && pn_ia32_vfld_res == pn_ia32_Load_res
4584 && pn_ia32_Load_res == pn_ia32_res);
4585 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4588 SET_IA32_ORIG_NODE(load, node);
4589 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4593 * Transform Builtin frame_address
4595 static ir_node *gen_frame_address(ir_node *node) {
4596 ir_node *param = get_Builtin_param(node, 0);
4597 ir_node *frame = get_Builtin_param(node, 1);
4598 dbg_info *dbgi = get_irn_dbg_info(node);
4599 tarval *tv = get_Const_tarval(param);
4600 unsigned long value = get_tarval_long(tv);
4602 ir_node *block = be_transform_node(get_nodes_block(node));
4603 ir_node *ptr = be_transform_node(frame);
4608 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4609 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4610 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4613 /* load the frame address from this frame */
4614 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4616 set_irn_pinned(load, get_irn_pinned(node));
4617 set_ia32_op_type(load, ia32_AddrModeS);
4618 set_ia32_ls_mode(load, mode_Iu);
4620 ent = ia32_get_frame_address_entity();
4622 set_ia32_am_offs_int(load, 0);
4623 set_ia32_use_frame(load);
4624 set_ia32_frame_ent(load, ent);
4626 /* will fail anyway, but gcc does this: */
4627 set_ia32_am_offs_int(load, 0);
4630 if (get_irn_pinned(node) == op_pin_state_floats) {
4631 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4632 && pn_ia32_vfld_res == pn_ia32_Load_res
4633 && pn_ia32_Load_res == pn_ia32_res);
4634 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4637 SET_IA32_ORIG_NODE(load, node);
4638 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4642 * Transform Builtin frame_address
4644 static ir_node *gen_prefetch(ir_node *node) {
4646 ir_node *ptr, *block, *mem, *base, *index;
4647 ir_node *param, *new_node;
4650 ia32_address_t addr;
4652 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4653 /* no prefetch at all, route memory */
4654 return be_transform_node(get_Builtin_mem(node));
4657 param = get_Builtin_param(node, 1);
4658 tv = get_Const_tarval(param);
4659 rw = get_tarval_long(tv);
4661 /* construct load address */
4662 memset(&addr, 0, sizeof(addr));
4663 ptr = get_Builtin_param(node, 0);
4664 ia32_create_address_mode(&addr, ptr, 0);
4671 base = be_transform_node(base);
4674 if (index == NULL) {
4677 index = be_transform_node(index);
4680 dbgi = get_irn_dbg_info(node);
4681 block = be_transform_node(get_nodes_block(node));
4682 mem = be_transform_node(get_Builtin_mem(node));
4684 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4685 /* we have 3DNow!, this was already checked above */
4686 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4687 } else if (ia32_cg_config.use_sse_prefetch) {
4688 /* note: rw == 1 is IGNORED in that case */
4689 param = get_Builtin_param(node, 2);
4690 tv = get_Const_tarval(param);
4691 locality = get_tarval_long(tv);
4693 /* SSE style prefetch */
4696 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4699 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4702 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4705 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4709 assert(ia32_cg_config.use_3dnow_prefetch);
4710 /* 3DNow! style prefetch */
4711 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4714 set_irn_pinned(new_node, get_irn_pinned(node));
4715 set_ia32_op_type(new_node, ia32_AddrModeS);
4716 set_ia32_ls_mode(new_node, mode_Bu);
4717 set_address(new_node, &addr);
4719 SET_IA32_ORIG_NODE(new_node, node);
4721 be_dep_on_frame(new_node);
4722 return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M);
4726 * Transform bsf like node
4728 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4730 ir_node *param = get_Builtin_param(node, 0);
4731 dbg_info *dbgi = get_irn_dbg_info(node);
4733 ir_node *block = get_nodes_block(node);
4734 ir_node *new_block = be_transform_node(block);
4736 ia32_address_mode_t am;
4737 ia32_address_t *addr = &am.addr;
4740 match_arguments(&am, block, NULL, param, NULL, match_am);
4742 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4743 set_am_attributes(cnt, &am);
4744 set_ia32_ls_mode(cnt, get_irn_mode(param));
4746 SET_IA32_ORIG_NODE(cnt, node);
4747 return fix_mem_proj(cnt, &am);
4751 * Transform builtin ffs.
4753 static ir_node *gen_ffs(ir_node *node)
4755 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4756 ir_node *real = skip_Proj(bsf);
4757 dbg_info *dbgi = get_irn_dbg_info(real);
4758 ir_node *block = get_nodes_block(real);
4759 ir_node *flag, *set, *conv, *neg, *or;
4762 if (get_irn_mode(real) != mode_T) {
4763 set_irn_mode(real, mode_T);
4764 bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res);
4767 flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags);
4770 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4771 SET_IA32_ORIG_NODE(set, node);
4774 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4775 SET_IA32_ORIG_NODE(conv, node);
4778 neg = new_bd_ia32_Neg(dbgi, block, conv);
4781 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4782 set_ia32_commutative(or);
4785 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4789 * Transform builtin clz.
4791 static ir_node *gen_clz(ir_node *node)
4793 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4794 ir_node *real = skip_Proj(bsr);
4795 dbg_info *dbgi = get_irn_dbg_info(real);
4796 ir_node *block = get_nodes_block(real);
4797 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4799 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4803 * Transform builtin ctz.
4805 static ir_node *gen_ctz(ir_node *node)
4807 return gen_unop_AM(node, new_bd_ia32_Bsf);
4811 * Transform builtin parity.
4813 static ir_node *gen_parity(ir_node *node)
4815 ir_node *param = get_Builtin_param(node, 0);
4816 dbg_info *dbgi = get_irn_dbg_info(node);
4818 ir_node *block = get_nodes_block(node);
4820 ir_node *new_block = be_transform_node(block);
4821 ir_node *imm, *cmp, *new_node;
4823 ia32_address_mode_t am;
4824 ia32_address_t *addr = &am.addr;
4828 match_arguments(&am, block, NULL, param, NULL, match_am);
4829 imm = ia32_create_Immediate(NULL, 0, 0);
4830 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4831 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4832 set_am_attributes(cmp, &am);
4833 set_ia32_ls_mode(cmp, mode_Iu);
4835 SET_IA32_ORIG_NODE(cmp, node);
4837 cmp = fix_mem_proj(cmp, &am);
4840 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4841 SET_IA32_ORIG_NODE(new_node, node);
4844 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4845 nomem, new_node, mode_Bu);
4846 SET_IA32_ORIG_NODE(new_node, node);
4851 * Transform builtin popcount
4853 static ir_node *gen_popcount(ir_node *node) {
4854 ir_node *param = get_Builtin_param(node, 0);
4855 dbg_info *dbgi = get_irn_dbg_info(node);
4857 ir_node *block = get_nodes_block(node);
4858 ir_node *new_block = be_transform_node(block);
4861 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4863 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4864 if (ia32_cg_config.use_popcnt) {
4865 ia32_address_mode_t am;
4866 ia32_address_t *addr = &am.addr;
4869 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4871 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4872 set_am_attributes(cnt, &am);
4873 set_ia32_ls_mode(cnt, get_irn_mode(param));
4875 SET_IA32_ORIG_NODE(cnt, node);
4876 return fix_mem_proj(cnt, &am);
4879 new_param = be_transform_node(param);
4881 /* do the standard popcount algo */
4883 /* m1 = x & 0x55555555 */
4884 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4885 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4888 simm = ia32_create_Immediate(NULL, 0, 1);
4889 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4891 /* m2 = s1 & 0x55555555 */
4892 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4895 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4897 /* m4 = m3 & 0x33333333 */
4898 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4899 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4902 simm = ia32_create_Immediate(NULL, 0, 2);
4903 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4905 /* m5 = s2 & 0x33333333 */
4906 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4909 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4911 /* m7 = m6 & 0x0F0F0F0F */
4912 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4913 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4916 simm = ia32_create_Immediate(NULL, 0, 4);
4917 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4919 /* m8 = s3 & 0x0F0F0F0F */
4920 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4923 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4925 /* m10 = m9 & 0x00FF00FF */
4926 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4927 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4930 simm = ia32_create_Immediate(NULL, 0, 8);
4931 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4933 /* m11 = s4 & 0x00FF00FF */
4934 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4936 /* m12 = m10 + m11 */
4937 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4939 /* m13 = m12 & 0x0000FFFF */
4940 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4941 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4943 /* s5 = m12 >> 16 */
4944 simm = ia32_create_Immediate(NULL, 0, 16);
4945 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4947 /* res = m13 + s5 */
4948 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4952 * Transform builtin byte swap.
4954 static ir_node *gen_bswap(ir_node *node) {
4955 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4956 dbg_info *dbgi = get_irn_dbg_info(node);
4958 ir_node *block = get_nodes_block(node);
4959 ir_node *new_block = be_transform_node(block);
4960 ir_mode *mode = get_irn_mode(param);
4961 unsigned size = get_mode_size_bits(mode);
4962 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4966 if (ia32_cg_config.use_i486) {
4967 /* swap available */
4968 return new_bd_ia32_Bswap(dbgi, new_block, param);
4970 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4971 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4973 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4974 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4976 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4978 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
4979 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
4981 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4982 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
4985 /* swap16 always available */
4986 return new_bd_ia32_Bswap16(dbgi, new_block, param);
4989 panic("Invalid bswap size (%d)", size);
4994 * Transform builtin outport.
4996 static ir_node *gen_outport(ir_node *node) {
4997 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
4998 ir_node *oldv = get_Builtin_param(node, 1);
4999 ir_mode *mode = get_irn_mode(oldv);
5000 ir_node *value = be_transform_node(oldv);
5001 ir_node *block = be_transform_node(get_nodes_block(node));
5002 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5003 dbg_info *dbgi = get_irn_dbg_info(node);
5005 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5006 set_ia32_ls_mode(res, mode);
5011 * Transform builtin inport.
5013 static ir_node *gen_inport(ir_node *node) {
5014 ir_type *tp = get_Builtin_type(node);
5015 ir_type *rstp = get_method_res_type(tp, 0);
5016 ir_mode *mode = get_type_mode(rstp);
5017 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5018 ir_node *block = be_transform_node(get_nodes_block(node));
5019 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5020 dbg_info *dbgi = get_irn_dbg_info(node);
5022 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5023 set_ia32_ls_mode(res, mode);
5025 /* check for missing Result Proj */
5030 * Transform a builtin inner trampoline
5032 static ir_node *gen_inner_trampoline(ir_node *node) {
5033 ir_node *ptr = get_Builtin_param(node, 0);
5034 ir_node *callee = be_transform_node(get_Builtin_param(node, 1));
5035 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5036 ir_node *mem = get_Builtin_mem(node);
5037 ir_node *block = get_nodes_block(node);
5038 ir_node *new_block = be_transform_node(block);
5041 dbg_info *dbgi = get_irn_dbg_info(node);
5042 ia32_address_t addr;
5044 /* construct store address */
5045 memset(&addr, 0, sizeof(addr));
5046 ia32_create_address_mode(&addr, ptr, 0);
5048 if (addr.base == NULL) {
5049 addr.base = noreg_GP;
5051 addr.base = be_transform_node(addr.base);
5054 if (addr.index == NULL) {
5055 addr.index = noreg_GP;
5057 addr.index = be_transform_node(addr.index);
5059 addr.mem = be_transform_node(mem);
5061 /* mov ecx, <env> */
5062 val = ia32_create_Immediate(NULL, 0, 0xB9);
5063 addr.mem = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5064 addr.index, addr.mem, val);
5067 addr.mem = new_bd_ia32_Store(dbgi, new_block, addr.base,
5068 addr.index, addr.mem, env);
5072 val = ia32_create_Immediate(NULL, 0, 0xE9);
5073 addr.mem = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5074 addr.index, addr.mem, val);
5076 in[0] = new_bd_ia32_Store(dbgi, new_block, addr.base,
5077 addr.index, addr.mem, callee);
5079 in[1] = be_transform_node(ptr);
5081 return new_Tuple(2, in);
5085 * Transform Builtin node.
5087 static ir_node *gen_Builtin(ir_node *node) {
5088 ir_builtin_kind kind = get_Builtin_kind(node);
5092 return gen_trap(node);
5093 case ir_bk_debugbreak:
5094 return gen_debugbreak(node);
5095 case ir_bk_return_address:
5096 return gen_return_address(node);
5097 case ir_bk_frame_addess:
5098 return gen_frame_address(node);
5099 case ir_bk_prefetch:
5100 return gen_prefetch(node);
5102 return gen_ffs(node);
5104 return gen_clz(node);
5106 return gen_ctz(node);
5108 return gen_parity(node);
5109 case ir_bk_popcount:
5110 return gen_popcount(node);
5112 return gen_bswap(node);
5114 return gen_outport(node);
5116 return gen_inport(node);
5117 case ir_bk_inner_trampoline:
5118 return gen_inner_trampoline(node);
5120 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5124 * Transform Proj(Builtin) node.
5126 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5127 ir_node *node = get_Proj_pred(proj);
5128 ir_node *new_node = be_transform_node(node);
5129 ir_builtin_kind kind = get_Builtin_kind(node);
5132 case ir_bk_return_address:
5133 case ir_bk_frame_addess:
5138 case ir_bk_popcount:
5140 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5143 case ir_bk_debugbreak:
5144 case ir_bk_prefetch:
5146 assert(get_Proj_proj(proj) == pn_Builtin_M);
5149 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5150 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5151 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5153 assert(get_Proj_proj(proj) == pn_Builtin_M);
5154 return new_r_Proj(current_ir_graph, get_nodes_block(new_node),
5155 new_node, mode_M, pn_ia32_Inport_M);
5157 case ir_bk_inner_trampoline:
5158 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5159 return get_Tuple_pred(new_node, 1);
5161 assert(get_Proj_proj(proj) == pn_Builtin_M);
5162 return get_Tuple_pred(new_node, 0);
5165 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5168 static ir_node *gen_be_IncSP(ir_node *node)
5170 ir_node *res = be_duplicate_node(node);
5171 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5177 * Transform the Projs from a be_Call.
5179 static ir_node *gen_Proj_be_Call(ir_node *node)
5181 ir_node *block = be_transform_node(get_nodes_block(node));
5182 ir_node *call = get_Proj_pred(node);
5183 ir_node *new_call = be_transform_node(call);
5184 ir_graph *irg = current_ir_graph;
5185 dbg_info *dbgi = get_irn_dbg_info(node);
5186 long proj = get_Proj_proj(node);
5187 ir_mode *mode = get_irn_mode(node);
5190 if (proj == pn_be_Call_M_regular) {
5191 return new_rd_Proj(dbgi, irg, block, new_call, mode_M, n_ia32_Call_mem);
5193 /* transform call modes */
5194 if (mode_is_data(mode)) {
5195 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5199 /* Map from be_Call to ia32_Call proj number */
5200 if (proj == pn_be_Call_sp) {
5201 proj = pn_ia32_Call_stack;
5202 } else if (proj == pn_be_Call_M_regular) {
5203 proj = pn_ia32_Call_M;
5205 arch_register_req_t const *const req = arch_get_register_req_out(node);
5206 int const n_outs = arch_irn_get_n_outs(new_call);
5209 assert(proj >= pn_be_Call_first_res);
5210 assert(req->type & arch_register_req_type_limited);
5212 for (i = 0; i < n_outs; ++i) {
5213 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5215 if (!(new_req->type & arch_register_req_type_limited) ||
5216 new_req->cls != req->cls ||
5217 *new_req->limited != *req->limited)
5226 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
5228 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5230 case pn_ia32_Call_stack:
5231 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5234 case pn_ia32_Call_fpcw:
5235 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5243 * Transform the Projs from a Cmp.
5245 static ir_node *gen_Proj_Cmp(ir_node *node)
5247 /* this probably means not all mode_b nodes were lowered... */
5248 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5253 * Transform the Projs from a Bound.
5255 static ir_node *gen_Proj_Bound(ir_node *node)
5257 ir_node *new_node, *block;
5258 ir_node *pred = get_Proj_pred(node);
5260 switch (get_Proj_proj(node)) {
5262 return be_transform_node(get_Bound_mem(pred));
5263 case pn_Bound_X_regular:
5264 new_node = be_transform_node(pred);
5265 block = get_nodes_block(new_node);
5266 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
5267 case pn_Bound_X_except:
5268 new_node = be_transform_node(pred);
5269 block = get_nodes_block(new_node);
5270 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
5272 return be_transform_node(get_Bound_index(pred));
5274 panic("unsupported Proj from Bound");
5278 static ir_node *gen_Proj_ASM(ir_node *node)
5280 ir_mode *mode = get_irn_mode(node);
5281 ir_node *pred = get_Proj_pred(node);
5282 ir_node *new_pred = be_transform_node(pred);
5283 ir_node *block = get_nodes_block(new_pred);
5284 long pos = get_Proj_proj(node);
5286 if (mode == mode_M) {
5287 pos = arch_irn_get_n_outs(new_pred) + 1;
5288 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5290 } else if (mode_is_float(mode)) {
5293 panic("unexpected proj mode at ASM");
5296 return new_r_Proj(current_ir_graph, block, new_pred, mode, pos);
5300 * Transform and potentially renumber Proj nodes.
5302 static ir_node *gen_Proj(ir_node *node)
5304 ir_node *pred = get_Proj_pred(node);
5307 switch (get_irn_opcode(pred)) {
5309 proj = get_Proj_proj(node);
5310 if (proj == pn_Store_M) {
5311 return be_transform_node(pred);
5313 panic("No idea how to transform proj->Store");
5316 return gen_Proj_Load(node);
5318 return gen_Proj_ASM(node);
5320 return gen_Proj_Builtin(node);
5324 return gen_Proj_DivMod(node);
5326 return gen_Proj_CopyB(node);
5328 return gen_Proj_Quot(node);
5330 return gen_Proj_be_SubSP(node);
5332 return gen_Proj_be_AddSP(node);
5334 return gen_Proj_be_Call(node);
5336 return gen_Proj_Cmp(node);
5338 return gen_Proj_Bound(node);
5340 proj = get_Proj_proj(node);
5342 case pn_Start_X_initial_exec: {
5343 ir_node *block = get_nodes_block(pred);
5344 ir_node *new_block = be_transform_node(block);
5345 dbg_info *dbgi = get_irn_dbg_info(node);
5346 /* we exchange the ProjX with a jump */
5347 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
5352 case pn_Start_P_tls:
5353 return gen_Proj_tls(node);
5358 if (is_ia32_l_FloattoLL(pred)) {
5359 return gen_Proj_l_FloattoLL(node);
5361 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5365 ir_mode *mode = get_irn_mode(node);
5366 if (ia32_mode_needs_gp_reg(mode)) {
5367 ir_node *new_pred = be_transform_node(pred);
5368 ir_node *block = be_transform_node(get_nodes_block(node));
5369 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5370 mode_Iu, get_Proj_proj(node));
5371 #ifdef DEBUG_libfirm
5372 new_proj->node_nr = node->node_nr;
5378 return be_duplicate_node(node);
5382 * Enters all transform functions into the generic pointer
5384 static void register_transformers(void)
5386 /* first clear the generic function pointer for all ops */
5387 clear_irp_opcodes_generic_func();
5389 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5390 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5429 /* transform ops from intrinsic lowering */
5441 GEN(ia32_l_LLtoFloat);
5442 GEN(ia32_l_FloattoLL);
5448 /* we should never see these nodes */
5463 /* handle builtins */
5466 /* handle generic backend nodes */
5480 * Pre-transform all unknown and noreg nodes.
5482 static void ia32_pretransform_node(void)
5484 ia32_code_gen_t *cg = env_cg;
5486 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5487 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5488 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5489 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5490 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5491 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5493 nomem = get_irg_no_mem(current_ir_graph);
5494 noreg_GP = ia32_new_NoReg_gp(cg);
5500 * Walker, checks if all ia32 nodes producing more than one result have their
5501 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5503 static void add_missing_keep_walker(ir_node *node, void *data)
5506 unsigned found_projs = 0;
5507 const ir_edge_t *edge;
5508 ir_mode *mode = get_irn_mode(node);
5513 if (!is_ia32_irn(node))
5516 n_outs = arch_irn_get_n_outs(node);
5519 if (is_ia32_SwitchJmp(node))
5522 assert(n_outs < (int) sizeof(unsigned) * 8);
5523 foreach_out_edge(node, edge) {
5524 ir_node *proj = get_edge_src_irn(edge);
5527 /* The node could be kept */
5531 if (get_irn_mode(proj) == mode_M)
5534 pn = get_Proj_proj(proj);
5535 assert(pn < n_outs);
5536 found_projs |= 1 << pn;
5540 /* are keeps missing? */
5542 for (i = 0; i < n_outs; ++i) {
5545 const arch_register_req_t *req;
5546 const arch_register_class_t *cls;
5548 if (found_projs & (1 << i)) {
5552 req = get_ia32_out_req(node, i);
5557 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5561 block = get_nodes_block(node);
5562 in[0] = new_r_Proj(current_ir_graph, block, node,
5563 arch_register_class_mode(cls), i);
5564 if (last_keep != NULL) {
5565 be_Keep_add_node(last_keep, cls, in[0]);
5567 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
5568 if (sched_is_scheduled(node)) {
5569 sched_add_after(node, last_keep);
5576 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5579 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5581 ir_graph *irg = be_get_birg_irg(cg->birg);
5582 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5586 * Post-process all calls if we are in SSE mode.
5587 * The ABI requires that the results are in st0, copy them
5588 * to a xmm register.
5590 static void postprocess_fp_call_results(void) {
5593 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5594 ir_node *call = call_list[i];
5595 ir_type *mtp = call_types[i];
5598 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5599 ir_type *res_tp = get_method_res_type(mtp, j);
5600 ir_node *res, *new_res;
5601 const ir_edge_t *edge, *next;
5604 if (! is_atomic_type(res_tp)) {
5605 /* no floating point return */
5608 mode = get_type_mode(res_tp);
5609 if (! mode_is_float(mode)) {
5610 /* no floating point return */
5614 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5617 /* now patch the users */
5618 foreach_out_edge_safe(res, edge, next) {
5619 ir_node *succ = get_edge_src_irn(edge);
5622 if (be_is_Keep(succ))
5625 if (is_ia32_xStore(succ)) {
5626 /* an xStore can be patched into an vfst */
5627 dbg_info *db = get_irn_dbg_info(succ);
5628 ir_node *block = get_nodes_block(succ);
5629 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5630 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5631 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5632 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5633 ir_mode *mode = get_ia32_ls_mode(succ);
5635 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5636 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5637 if (is_ia32_use_frame(succ))
5638 set_ia32_use_frame(st);
5639 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5640 set_irn_pinned(st, get_irn_pinned(succ));
5641 set_ia32_op_type(st, ia32_AddrModeD);
5645 if (new_res == NULL) {
5646 dbg_info *db = get_irn_dbg_info(call);
5647 ir_node *block = get_nodes_block(call);
5648 ir_node *frame = get_irg_frame(current_ir_graph);
5649 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5650 ir_node *call_mem = new_r_Proj(current_ir_graph, block, call, mode_M, pn_ia32_Call_M);
5651 ir_node *vfst, *xld, *new_mem;
5653 /* store st(0) on stack */
5654 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5655 set_ia32_op_type(vfst, ia32_AddrModeD);
5656 set_ia32_use_frame(vfst);
5658 /* load into SSE register */
5659 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5660 set_ia32_op_type(xld, ia32_AddrModeS);
5661 set_ia32_use_frame(xld);
5663 new_res = new_r_Proj(current_ir_graph, block, xld, mode, pn_ia32_xLoad_res);
5664 new_mem = new_r_Proj(current_ir_graph, block, xld, mode_M, pn_ia32_xLoad_M);
5666 if (old_mem != NULL) {
5667 edges_reroute(old_mem, new_mem, current_ir_graph);
5671 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5678 /* do the transformation */
5679 void ia32_transform_graph(ia32_code_gen_t *cg)
5683 register_transformers();
5685 initial_fpcw = NULL;
5687 BE_TIMER_PUSH(t_heights);
5688 heights = heights_new(cg->irg);
5689 BE_TIMER_POP(t_heights);
5690 ia32_calculate_non_address_mode_nodes(cg->birg);
5692 /* the transform phase is not safe for CSE (yet) because several nodes get
5693 * attributes set after their creation */
5694 cse_last = get_opt_cse();
5697 call_list = NEW_ARR_F(ir_node *, 0);
5698 call_types = NEW_ARR_F(ir_type *, 0);
5699 be_transform_graph(cg->birg, ia32_pretransform_node);
5701 if (ia32_cg_config.use_sse2)
5702 postprocess_fp_call_results();
5703 DEL_ARR_F(call_types);
5704 DEL_ARR_F(call_list);
5706 set_opt_cse(cse_last);
5708 ia32_free_non_address_mode_nodes();
5709 heights_free(heights);
5713 void ia32_init_transform(void)
5715 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");