2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
78 #define ULL_BIAS "18446744073709551616"
80 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
81 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
82 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
83 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
84 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
86 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
87 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
89 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
91 static ir_node *initial_fpcw = NULL;
93 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
97 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
101 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
102 ir_node *op1, ir_node *op2);
104 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
105 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
107 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem);
110 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
114 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
116 static ir_node *create_immediate_or_transform(ir_node *node,
117 char immediate_constraint_type);
119 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
120 dbg_info *dbgi, ir_node *block,
121 ir_node *op, ir_node *orig_node);
123 /** Return non-zero is a node represents the 0 constant. */
124 static bool is_Const_0(ir_node *node)
126 return is_Const(node) && is_Const_null(node);
129 /** Return non-zero is a node represents the 1 constant. */
130 static bool is_Const_1(ir_node *node)
132 return is_Const(node) && is_Const_one(node);
135 /** Return non-zero is a node represents the -1 constant. */
136 static bool is_Const_Minus_1(ir_node *node)
138 return is_Const(node) && is_Const_all_one(node);
142 * returns true if constant can be created with a simple float command
144 static bool is_simple_x87_Const(ir_node *node)
146 tarval *tv = get_Const_tarval(node);
147 if (tarval_is_null(tv) || tarval_is_one(tv))
150 /* TODO: match all the other float constants */
155 * returns true if constant can be created with a simple float command
157 static bool is_simple_sse_Const(ir_node *node)
159 tarval *tv = get_Const_tarval(node);
160 ir_mode *mode = get_tarval_mode(tv);
165 if (tarval_is_null(tv) || tarval_is_one(tv))
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
178 /* TODO: match all the other float constants */
183 * Transforms a Const.
185 static ir_node *gen_Const(ir_node *node)
187 ir_node *old_block = get_nodes_block(node);
188 ir_node *block = be_transform_node(old_block);
189 dbg_info *dbgi = get_irn_dbg_info(node);
190 ir_mode *mode = get_irn_mode(node);
192 assert(is_Const(node));
194 if (mode_is_float(mode)) {
196 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
197 ir_node *nomem = new_NoMem();
201 if (ia32_cg_config.use_sse2) {
202 tarval *tv = get_Const_tarval(node);
203 if (tarval_is_null(tv)) {
204 load = new_bd_ia32_xZero(dbgi, block);
205 set_ia32_ls_mode(load, mode);
207 } else if (tarval_is_one(tv)) {
208 int cnst = mode == mode_F ? 26 : 55;
209 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
210 ir_node *imm2 = create_Immediate(NULL, 0, 2);
211 ir_node *pslld, *psrld;
213 load = new_bd_ia32_xAllOnes(dbgi, block);
214 set_ia32_ls_mode(load, mode);
215 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
216 set_ia32_ls_mode(pslld, mode);
217 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
218 set_ia32_ls_mode(psrld, mode);
220 } else if (mode == mode_F) {
221 /* we can place any 32bit constant by using a movd gp, sse */
222 unsigned val = get_tarval_sub_bits(tv, 0) |
223 (get_tarval_sub_bits(tv, 1) << 8) |
224 (get_tarval_sub_bits(tv, 2) << 16) |
225 (get_tarval_sub_bits(tv, 3) << 24);
226 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
227 load = new_bd_ia32_xMovd(dbgi, block, cnst);
228 set_ia32_ls_mode(load, mode);
231 if (mode == mode_D) {
232 unsigned val = get_tarval_sub_bits(tv, 0) |
233 (get_tarval_sub_bits(tv, 1) << 8) |
234 (get_tarval_sub_bits(tv, 2) << 16) |
235 (get_tarval_sub_bits(tv, 3) << 24);
237 ir_node *imm32 = create_Immediate(NULL, 0, 32);
238 ir_node *cnst, *psllq;
240 /* fine, lower 32bit are zero, produce 32bit value */
241 val = get_tarval_sub_bits(tv, 4) |
242 (get_tarval_sub_bits(tv, 5) << 8) |
243 (get_tarval_sub_bits(tv, 6) << 16) |
244 (get_tarval_sub_bits(tv, 7) << 24);
245 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
248 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
249 set_ia32_ls_mode(psllq, mode);
254 floatent = create_float_const_entity(node);
256 load = new_bd_ia32_xLoad(dbgi, block, noreg, noreg, nomem,
258 set_ia32_op_type(load, ia32_AddrModeS);
259 set_ia32_am_sc(load, floatent);
260 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
261 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
264 if (is_Const_null(node)) {
265 load = new_bd_ia32_vfldz(dbgi, block);
267 set_ia32_ls_mode(load, mode);
268 } else if (is_Const_one(node)) {
269 load = new_bd_ia32_vfld1(dbgi, block);
271 set_ia32_ls_mode(load, mode);
275 floatent = create_float_const_entity(node);
276 /* create_float_const_ent is smart and sometimes creates
278 ls_mode = get_type_mode(get_entity_type(floatent));
280 load = new_bd_ia32_vfld(dbgi, block, noreg, noreg, nomem,
282 set_ia32_op_type(load, ia32_AddrModeS);
283 set_ia32_am_sc(load, floatent);
284 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
285 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
289 SET_IA32_ORIG_NODE(load, node);
291 be_dep_on_frame(load);
293 } else { /* non-float mode */
295 tarval *tv = get_Const_tarval(node);
298 tv = tarval_convert_to(tv, mode_Iu);
300 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
302 panic("couldn't convert constant tarval (%+F)", node);
304 val = get_tarval_long(tv);
306 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
307 SET_IA32_ORIG_NODE(cnst, node);
309 be_dep_on_frame(cnst);
315 * Transforms a SymConst.
317 static ir_node *gen_SymConst(ir_node *node)
319 ir_node *old_block = get_nodes_block(node);
320 ir_node *block = be_transform_node(old_block);
321 dbg_info *dbgi = get_irn_dbg_info(node);
322 ir_mode *mode = get_irn_mode(node);
325 if (mode_is_float(mode)) {
326 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
327 ir_node *nomem = new_NoMem();
329 if (ia32_cg_config.use_sse2)
330 cnst = new_bd_ia32_xLoad(dbgi, block, noreg, noreg, nomem, mode_E);
332 cnst = new_bd_ia32_vfld(dbgi, block, noreg, noreg, nomem, mode_E);
333 set_ia32_am_sc(cnst, get_SymConst_entity(node));
334 set_ia32_use_frame(cnst);
338 if (get_SymConst_kind(node) != symconst_addr_ent) {
339 panic("backend only support symconst_addr_ent (at %+F)", node);
341 entity = get_SymConst_entity(node);
342 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0);
345 SET_IA32_ORIG_NODE(cnst, node);
347 be_dep_on_frame(cnst);
352 * Create a float type for the given mode and cache it.
354 * @param mode the mode for the float type (might be integer mode for SSE2 types)
355 * @param align alignment
357 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
363 if (mode == mode_Iu) {
364 static ir_type *int_Iu[16] = {NULL, };
366 if (int_Iu[align] == NULL) {
367 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
368 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
369 /* set the specified alignment */
370 set_type_alignment_bytes(tp, align);
372 return int_Iu[align];
373 } else if (mode == mode_Lu) {
374 static ir_type *int_Lu[16] = {NULL, };
376 if (int_Lu[align] == NULL) {
377 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
378 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
379 /* set the specified alignment */
380 set_type_alignment_bytes(tp, align);
382 return int_Lu[align];
383 } else if (mode == mode_F) {
384 static ir_type *float_F[16] = {NULL, };
386 if (float_F[align] == NULL) {
387 snprintf(buf, sizeof(buf), "float_F_%u", align);
388 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
389 /* set the specified alignment */
390 set_type_alignment_bytes(tp, align);
392 return float_F[align];
393 } else if (mode == mode_D) {
394 static ir_type *float_D[16] = {NULL, };
396 if (float_D[align] == NULL) {
397 snprintf(buf, sizeof(buf), "float_D_%u", align);
398 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
399 /* set the specified alignment */
400 set_type_alignment_bytes(tp, align);
402 return float_D[align];
404 static ir_type *float_E[16] = {NULL, };
406 if (float_E[align] == NULL) {
407 snprintf(buf, sizeof(buf), "float_E_%u", align);
408 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return float_E[align];
417 * Create a float[2] array type for the given atomic type.
419 * @param tp the atomic type
421 static ir_type *ia32_create_float_array(ir_type *tp) {
423 ir_mode *mode = get_type_mode(tp);
424 unsigned align = get_type_alignment_bytes(tp);
429 if (mode == mode_F) {
430 static ir_type *float_F[16] = {NULL, };
432 if (float_F[align] != NULL)
433 return float_F[align];
434 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
435 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
436 } else if (mode == mode_D) {
437 static ir_type *float_D[16] = {NULL, };
439 if (float_D[align] != NULL)
440 return float_D[align];
441 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
442 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
444 static ir_type *float_E[16] = {NULL, };
446 if (float_E[align] != NULL)
447 return float_E[align];
448 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
449 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
451 set_type_alignment_bytes(arr, align);
452 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
453 set_type_state(arr, layout_fixed);
457 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
458 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
460 static const struct {
461 const char *ent_name;
462 const char *cnst_str;
465 } names [ia32_known_const_max] = {
466 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
467 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
468 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
469 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
470 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
472 static ir_entity *ent_cache[ia32_known_const_max];
474 const char *ent_name, *cnst_str;
480 ent_name = names[kct].ent_name;
481 if (! ent_cache[kct]) {
482 cnst_str = names[kct].cnst_str;
484 switch (names[kct].mode) {
485 case 0: mode = mode_Iu; break;
486 case 1: mode = mode_Lu; break;
487 default: mode = mode_F; break;
489 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
490 tp = ia32_create_float_type(mode, names[kct].align);
492 if (kct == ia32_ULLBIAS)
493 tp = ia32_create_float_array(tp);
494 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
496 set_entity_ld_ident(ent, get_entity_ident(ent));
497 set_entity_visibility(ent, visibility_local);
498 set_entity_variability(ent, variability_constant);
499 set_entity_allocation(ent, allocation_static);
501 if (kct == ia32_ULLBIAS) {
502 ir_initializer_t *initializer = create_initializer_compound(2);
504 set_initializer_compound_value(initializer, 0,
505 create_initializer_tarval(get_tarval_null(mode)));
506 set_initializer_compound_value(initializer, 1,
507 create_initializer_tarval(tv));
509 set_entity_initializer(ent, initializer);
511 set_entity_initializer(ent, create_initializer_tarval(tv));
514 /* cache the entry */
515 ent_cache[kct] = ent;
518 return ent_cache[kct];
522 * return true if the node is a Proj(Load) and could be used in source address
523 * mode for another node. Will return only true if the @p other node is not
524 * dependent on the memory of the Load (for binary operations use the other
525 * input here, for unary operations use NULL).
527 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
528 ir_node *other, ir_node *other2, match_flags_t flags)
533 /* float constants are always available */
534 if (is_Const(node)) {
535 ir_mode *mode = get_irn_mode(node);
536 if (mode_is_float(mode)) {
537 if (ia32_cg_config.use_sse2) {
538 if (is_simple_sse_Const(node))
541 if (is_simple_x87_Const(node))
544 if (get_irn_n_edges(node) > 1)
552 load = get_Proj_pred(node);
553 pn = get_Proj_proj(node);
554 if (!is_Load(load) || pn != pn_Load_res)
556 if (get_nodes_block(load) != block)
558 /* we only use address mode if we're the only user of the load */
559 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
561 /* in some edge cases with address mode we might reach the load normally
562 * and through some AM sequence, if it is already materialized then we
563 * can't create an AM node from it */
564 if (be_is_transformed(node))
567 /* don't do AM if other node inputs depend on the load (via mem-proj) */
568 if (other != NULL && prevents_AM(block, load, other))
571 if (other2 != NULL && prevents_AM(block, load, other2))
577 typedef struct ia32_address_mode_t ia32_address_mode_t;
578 struct ia32_address_mode_t {
583 ia32_op_type_t op_type;
587 unsigned commutative : 1;
588 unsigned ins_permuted : 1;
591 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
595 /* construct load address */
596 memset(addr, 0, sizeof(addr[0]));
597 ia32_create_address_mode(addr, ptr, 0);
599 noreg_gp = ia32_new_NoReg_gp(env_cg);
600 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
601 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
602 addr->mem = be_transform_node(mem);
605 static void build_address(ia32_address_mode_t *am, ir_node *node,
606 ia32_create_am_flags_t flags)
608 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
609 ia32_address_t *addr = &am->addr;
615 if (is_Const(node)) {
616 ir_entity *entity = create_float_const_entity(node);
617 addr->base = noreg_gp;
618 addr->index = noreg_gp;
619 addr->mem = new_NoMem();
620 addr->symconst_ent = entity;
622 am->ls_mode = get_type_mode(get_entity_type(entity));
623 am->pinned = op_pin_state_floats;
627 load = get_Proj_pred(node);
628 ptr = get_Load_ptr(load);
629 mem = get_Load_mem(load);
630 new_mem = be_transform_node(mem);
631 am->pinned = get_irn_pinned(load);
632 am->ls_mode = get_Load_mode(load);
633 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
636 /* construct load address */
637 ia32_create_address_mode(addr, ptr, flags);
639 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
640 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
644 static void set_address(ir_node *node, const ia32_address_t *addr)
646 set_ia32_am_scale(node, addr->scale);
647 set_ia32_am_sc(node, addr->symconst_ent);
648 set_ia32_am_offs_int(node, addr->offset);
649 if (addr->symconst_sign)
650 set_ia32_am_sc_sign(node);
652 set_ia32_use_frame(node);
653 set_ia32_frame_ent(node, addr->frame_entity);
657 * Apply attributes of a given address mode to a node.
659 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
661 set_address(node, &am->addr);
663 set_ia32_op_type(node, am->op_type);
664 set_ia32_ls_mode(node, am->ls_mode);
665 if (am->pinned == op_pin_state_pinned) {
666 /* beware: some nodes are already pinned and did not allow to change the state */
667 if (get_irn_pinned(node) != op_pin_state_pinned)
668 set_irn_pinned(node, op_pin_state_pinned);
671 set_ia32_commutative(node);
675 * Check, if a given node is a Down-Conv, ie. a integer Conv
676 * from a mode with a mode with more bits to a mode with lesser bits.
677 * Moreover, we return only true if the node has not more than 1 user.
679 * @param node the node
680 * @return non-zero if node is a Down-Conv
682 static int is_downconv(const ir_node *node)
690 /* we only want to skip the conv when we're the only user
691 * (not optimal but for now...)
693 if (get_irn_n_edges(node) > 1)
696 src_mode = get_irn_mode(get_Conv_op(node));
697 dest_mode = get_irn_mode(node);
699 ia32_mode_needs_gp_reg(src_mode) &&
700 ia32_mode_needs_gp_reg(dest_mode) &&
701 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
704 /* Skip all Down-Conv's on a given node and return the resulting node. */
705 ir_node *ia32_skip_downconv(ir_node *node)
707 while (is_downconv(node))
708 node = get_Conv_op(node);
713 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
715 ir_mode *mode = get_irn_mode(node);
720 if (mode_is_signed(mode)) {
725 block = get_nodes_block(node);
726 dbgi = get_irn_dbg_info(node);
728 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
732 * matches operands of a node into ia32 addressing/operand modes. This covers
733 * usage of source address mode, immediates, operations with non 32-bit modes,
735 * The resulting data is filled into the @p am struct. block is the block
736 * of the node whose arguments are matched. op1, op2 are the first and second
737 * input that are matched (op1 may be NULL). other_op is another unrelated
738 * input that is not matched! but which is needed sometimes to check if AM
739 * for op1/op2 is legal.
740 * @p flags describes the supported modes of the operation in detail.
742 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
743 ir_node *op1, ir_node *op2, ir_node *other_op,
746 ia32_address_t *addr = &am->addr;
747 ir_mode *mode = get_irn_mode(op2);
748 int mode_bits = get_mode_size_bits(mode);
749 ir_node *noreg_gp, *new_op1, *new_op2;
751 unsigned commutative;
752 int use_am_and_immediates;
755 memset(am, 0, sizeof(am[0]));
757 commutative = (flags & match_commutative) != 0;
758 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
759 use_am = (flags & match_am) != 0;
760 use_immediate = (flags & match_immediate) != 0;
761 assert(!use_am_and_immediates || use_immediate);
764 assert(!commutative || op1 != NULL);
765 assert(use_am || !(flags & match_8bit_am));
766 assert(use_am || !(flags & match_16bit_am));
768 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
769 (mode_bits == 16 && !(flags & match_16bit_am))) {
773 /* we can simply skip downconvs for mode neutral nodes: the upper bits
774 * can be random for these operations */
775 if (flags & match_mode_neutral) {
776 op2 = ia32_skip_downconv(op2);
778 op1 = ia32_skip_downconv(op1);
782 /* match immediates. firm nodes are normalized: constants are always on the
785 if (!(flags & match_try_am) && use_immediate) {
786 new_op2 = try_create_Immediate(op2, 0);
789 noreg_gp = ia32_new_NoReg_gp(env_cg);
790 if (new_op2 == NULL &&
791 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
792 build_address(am, op2, 0);
793 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
794 if (mode_is_float(mode)) {
795 new_op2 = ia32_new_NoReg_vfp(env_cg);
799 am->op_type = ia32_AddrModeS;
800 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
802 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
804 build_address(am, op1, 0);
806 if (mode_is_float(mode)) {
807 noreg = ia32_new_NoReg_vfp(env_cg);
812 if (new_op2 != NULL) {
815 new_op1 = be_transform_node(op2);
817 am->ins_permuted = 1;
819 am->op_type = ia32_AddrModeS;
821 am->op_type = ia32_Normal;
823 if (flags & match_try_am) {
829 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
831 new_op2 = be_transform_node(op2);
833 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
835 if (addr->base == NULL)
836 addr->base = noreg_gp;
837 if (addr->index == NULL)
838 addr->index = noreg_gp;
839 if (addr->mem == NULL)
840 addr->mem = new_NoMem();
842 am->new_op1 = new_op1;
843 am->new_op2 = new_op2;
844 am->commutative = commutative;
847 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
852 if (am->mem_proj == NULL)
855 /* we have to create a mode_T so the old MemProj can attach to us */
856 mode = get_irn_mode(node);
857 load = get_Proj_pred(am->mem_proj);
859 be_set_transformed_node(load, node);
861 if (mode != mode_T) {
862 set_irn_mode(node, mode_T);
863 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
870 * Construct a standard binary operation, set AM and immediate if required.
872 * @param node The original node for which the binop is created
873 * @param op1 The first operand
874 * @param op2 The second operand
875 * @param func The node constructor function
876 * @return The constructed ia32 node.
878 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
879 construct_binop_func *func, match_flags_t flags)
882 ir_node *block, *new_block, *new_node;
883 ia32_address_mode_t am;
884 ia32_address_t *addr = &am.addr;
886 block = get_nodes_block(node);
887 match_arguments(&am, block, op1, op2, NULL, flags);
889 dbgi = get_irn_dbg_info(node);
890 new_block = be_transform_node(block);
891 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
892 am.new_op1, am.new_op2);
893 set_am_attributes(new_node, &am);
894 /* we can't use source address mode anymore when using immediates */
895 if (!(flags & match_am_and_immediates) &&
896 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
897 set_ia32_am_support(new_node, ia32_am_none);
898 SET_IA32_ORIG_NODE(new_node, node);
900 new_node = fix_mem_proj(new_node, &am);
907 n_ia32_l_binop_right,
908 n_ia32_l_binop_eflags
910 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
911 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
912 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
913 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
914 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
915 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
918 * Construct a binary operation which also consumes the eflags.
920 * @param node The node to transform
921 * @param func The node constructor function
922 * @param flags The match flags
923 * @return The constructor ia32 node
925 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
928 ir_node *src_block = get_nodes_block(node);
929 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
930 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
931 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
933 ir_node *block, *new_node, *new_eflags;
934 ia32_address_mode_t am;
935 ia32_address_t *addr = &am.addr;
937 match_arguments(&am, src_block, op1, op2, eflags, flags);
939 dbgi = get_irn_dbg_info(node);
940 block = be_transform_node(src_block);
941 new_eflags = be_transform_node(eflags);
942 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
943 am.new_op1, am.new_op2, new_eflags);
944 set_am_attributes(new_node, &am);
945 /* we can't use source address mode anymore when using immediates */
946 if (!(flags & match_am_and_immediates) &&
947 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
948 set_ia32_am_support(new_node, ia32_am_none);
949 SET_IA32_ORIG_NODE(new_node, node);
951 new_node = fix_mem_proj(new_node, &am);
956 static ir_node *get_fpcw(void)
959 if (initial_fpcw != NULL)
962 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
963 &ia32_fp_cw_regs[REG_FPCW]);
964 initial_fpcw = be_transform_node(fpcw);
970 * Construct a standard binary operation, set AM and immediate if required.
972 * @param op1 The first operand
973 * @param op2 The second operand
974 * @param func The node constructor function
975 * @return The constructed ia32 node.
977 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
978 construct_binop_float_func *func)
980 ir_mode *mode = get_irn_mode(node);
982 ir_node *block, *new_block, *new_node;
983 ia32_address_mode_t am;
984 ia32_address_t *addr = &am.addr;
985 ia32_x87_attr_t *attr;
986 /* All operations are considered commutative, because there are reverse
988 match_flags_t flags = match_commutative;
990 /* cannot use address mode with long double on x87 */
991 if (get_mode_size_bits(mode) <= 64)
994 block = get_nodes_block(node);
995 match_arguments(&am, block, op1, op2, NULL, flags);
997 dbgi = get_irn_dbg_info(node);
998 new_block = be_transform_node(block);
999 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1000 am.new_op1, am.new_op2, get_fpcw());
1001 set_am_attributes(new_node, &am);
1003 attr = get_ia32_x87_attr(new_node);
1004 attr->attr.data.ins_permuted = am.ins_permuted;
1006 SET_IA32_ORIG_NODE(new_node, node);
1008 new_node = fix_mem_proj(new_node, &am);
1014 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1016 * @param op1 The first operand
1017 * @param op2 The second operand
1018 * @param func The node constructor function
1019 * @return The constructed ia32 node.
1021 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1022 construct_shift_func *func,
1023 match_flags_t flags)
1026 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1028 assert(! mode_is_float(get_irn_mode(node)));
1029 assert(flags & match_immediate);
1030 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1032 if (flags & match_mode_neutral) {
1033 op1 = ia32_skip_downconv(op1);
1034 new_op1 = be_transform_node(op1);
1035 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1036 new_op1 = create_upconv(op1, node);
1038 new_op1 = be_transform_node(op1);
1041 /* the shift amount can be any mode that is bigger than 5 bits, since all
1042 * other bits are ignored anyway */
1043 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1044 ir_node *const op = get_Conv_op(op2);
1045 if (mode_is_float(get_irn_mode(op)))
1048 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1050 new_op2 = create_immediate_or_transform(op2, 0);
1052 dbgi = get_irn_dbg_info(node);
1053 block = get_nodes_block(node);
1054 new_block = be_transform_node(block);
1055 new_node = func(dbgi, new_block, new_op1, new_op2);
1056 SET_IA32_ORIG_NODE(new_node, node);
1058 /* lowered shift instruction may have a dependency operand, handle it here */
1059 if (get_irn_arity(node) == 3) {
1060 /* we have a dependency */
1061 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1062 add_irn_dep(new_node, new_dep);
1070 * Construct a standard unary operation, set AM and immediate if required.
1072 * @param op The operand
1073 * @param func The node constructor function
1074 * @return The constructed ia32 node.
1076 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1077 match_flags_t flags)
1080 ir_node *block, *new_block, *new_op, *new_node;
1082 assert(flags == 0 || flags == match_mode_neutral);
1083 if (flags & match_mode_neutral) {
1084 op = ia32_skip_downconv(op);
1087 new_op = be_transform_node(op);
1088 dbgi = get_irn_dbg_info(node);
1089 block = get_nodes_block(node);
1090 new_block = be_transform_node(block);
1091 new_node = func(dbgi, new_block, new_op);
1093 SET_IA32_ORIG_NODE(new_node, node);
1098 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1099 ia32_address_t *addr)
1101 ir_node *base, *index, *res;
1105 base = ia32_new_NoReg_gp(env_cg);
1107 base = be_transform_node(base);
1110 index = addr->index;
1111 if (index == NULL) {
1112 index = ia32_new_NoReg_gp(env_cg);
1114 index = be_transform_node(index);
1117 res = new_bd_ia32_Lea(dbgi, block, base, index);
1118 set_address(res, addr);
1124 * Returns non-zero if a given address mode has a symbolic or
1125 * numerical offset != 0.
1127 static int am_has_immediates(const ia32_address_t *addr)
1129 return addr->offset != 0 || addr->symconst_ent != NULL
1130 || addr->frame_entity || addr->use_frame;
1134 * Creates an ia32 Add.
1136 * @return the created ia32 Add node
1138 static ir_node *gen_Add(ir_node *node)
1140 ir_mode *mode = get_irn_mode(node);
1141 ir_node *op1 = get_Add_left(node);
1142 ir_node *op2 = get_Add_right(node);
1144 ir_node *block, *new_block, *new_node, *add_immediate_op;
1145 ia32_address_t addr;
1146 ia32_address_mode_t am;
1148 if (mode_is_float(mode)) {
1149 if (ia32_cg_config.use_sse2)
1150 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1151 match_commutative | match_am);
1153 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1156 ia32_mark_non_am(node);
1158 op2 = ia32_skip_downconv(op2);
1159 op1 = ia32_skip_downconv(op1);
1163 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1164 * 1. Add with immediate -> Lea
1165 * 2. Add with possible source address mode -> Add
1166 * 3. Otherwise -> Lea
1168 memset(&addr, 0, sizeof(addr));
1169 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1170 add_immediate_op = NULL;
1172 dbgi = get_irn_dbg_info(node);
1173 block = get_nodes_block(node);
1174 new_block = be_transform_node(block);
1177 if (addr.base == NULL && addr.index == NULL) {
1178 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1179 addr.symconst_sign, addr.offset);
1180 be_dep_on_frame(new_node);
1181 SET_IA32_ORIG_NODE(new_node, node);
1184 /* add with immediate? */
1185 if (addr.index == NULL) {
1186 add_immediate_op = addr.base;
1187 } else if (addr.base == NULL && addr.scale == 0) {
1188 add_immediate_op = addr.index;
1191 if (add_immediate_op != NULL) {
1192 if (!am_has_immediates(&addr)) {
1193 #ifdef DEBUG_libfirm
1194 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1197 return be_transform_node(add_immediate_op);
1200 new_node = create_lea_from_address(dbgi, new_block, &addr);
1201 SET_IA32_ORIG_NODE(new_node, node);
1205 /* test if we can use source address mode */
1206 match_arguments(&am, block, op1, op2, NULL, match_commutative
1207 | match_mode_neutral | match_am | match_immediate | match_try_am);
1209 /* construct an Add with source address mode */
1210 if (am.op_type == ia32_AddrModeS) {
1211 ia32_address_t *am_addr = &am.addr;
1212 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1213 am_addr->index, am_addr->mem, am.new_op1,
1215 set_am_attributes(new_node, &am);
1216 SET_IA32_ORIG_NODE(new_node, node);
1218 new_node = fix_mem_proj(new_node, &am);
1223 /* otherwise construct a lea */
1224 new_node = create_lea_from_address(dbgi, new_block, &addr);
1225 SET_IA32_ORIG_NODE(new_node, node);
1230 * Creates an ia32 Mul.
1232 * @return the created ia32 Mul node
1234 static ir_node *gen_Mul(ir_node *node)
1236 ir_node *op1 = get_Mul_left(node);
1237 ir_node *op2 = get_Mul_right(node);
1238 ir_mode *mode = get_irn_mode(node);
1240 if (mode_is_float(mode)) {
1241 if (ia32_cg_config.use_sse2)
1242 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1243 match_commutative | match_am);
1245 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1247 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1248 match_commutative | match_am | match_mode_neutral |
1249 match_immediate | match_am_and_immediates);
1253 * Creates an ia32 Mulh.
1254 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1255 * this result while Mul returns the lower 32 bit.
1257 * @return the created ia32 Mulh node
1259 static ir_node *gen_Mulh(ir_node *node)
1261 ir_node *block = get_nodes_block(node);
1262 ir_node *new_block = be_transform_node(block);
1263 dbg_info *dbgi = get_irn_dbg_info(node);
1264 ir_node *op1 = get_Mulh_left(node);
1265 ir_node *op2 = get_Mulh_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 ir_node *proj_res_high;
1270 if (mode_is_signed(mode)) {
1271 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1272 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1273 mode_Iu, pn_ia32_IMul1OP_res_high);
1275 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1276 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1277 mode_Iu, pn_ia32_Mul_res_high);
1279 return proj_res_high;
1283 * Creates an ia32 And.
1285 * @return The created ia32 And node
1287 static ir_node *gen_And(ir_node *node)
1289 ir_node *op1 = get_And_left(node);
1290 ir_node *op2 = get_And_right(node);
1291 assert(! mode_is_float(get_irn_mode(node)));
1293 /* is it a zero extension? */
1294 if (is_Const(op2)) {
1295 tarval *tv = get_Const_tarval(op2);
1296 long v = get_tarval_long(tv);
1298 if (v == 0xFF || v == 0xFFFF) {
1299 dbg_info *dbgi = get_irn_dbg_info(node);
1300 ir_node *block = get_nodes_block(node);
1307 assert(v == 0xFFFF);
1310 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1315 return gen_binop(node, op1, op2, new_bd_ia32_And,
1316 match_commutative | match_mode_neutral | match_am | match_immediate);
1322 * Creates an ia32 Or.
1324 * @return The created ia32 Or node
1326 static ir_node *gen_Or(ir_node *node)
1328 ir_node *op1 = get_Or_left(node);
1329 ir_node *op2 = get_Or_right(node);
1331 assert (! mode_is_float(get_irn_mode(node)));
1332 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1333 | match_mode_neutral | match_am | match_immediate);
1339 * Creates an ia32 Eor.
1341 * @return The created ia32 Eor node
1343 static ir_node *gen_Eor(ir_node *node)
1345 ir_node *op1 = get_Eor_left(node);
1346 ir_node *op2 = get_Eor_right(node);
1348 assert(! mode_is_float(get_irn_mode(node)));
1349 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1350 | match_mode_neutral | match_am | match_immediate);
1355 * Creates an ia32 Sub.
1357 * @return The created ia32 Sub node
1359 static ir_node *gen_Sub(ir_node *node)
1361 ir_node *op1 = get_Sub_left(node);
1362 ir_node *op2 = get_Sub_right(node);
1363 ir_mode *mode = get_irn_mode(node);
1365 if (mode_is_float(mode)) {
1366 if (ia32_cg_config.use_sse2)
1367 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1369 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1372 if (is_Const(op2)) {
1373 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1377 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1378 | match_am | match_immediate);
1381 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1382 ir_node *const src_val,
1383 ir_node *const src_mem,
1384 ir_node *const am_mem)
1386 if (is_NoMem(am_mem)) {
1387 return be_transform_node(src_mem);
1388 } else if (is_Proj(src_val) &&
1390 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1391 /* avoid memory loop */
1393 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1394 ir_node *const ptr_pred = get_Proj_pred(src_val);
1395 int const arity = get_Sync_n_preds(src_mem);
1400 NEW_ARR_A(ir_node*, ins, arity + 1);
1402 /* NOTE: This sometimes produces dead-code because the old sync in
1403 * src_mem might not be used anymore, we should detect this case
1404 * and kill the sync... */
1405 for (i = arity - 1; i >= 0; --i) {
1406 ir_node *const pred = get_Sync_pred(src_mem, i);
1408 /* avoid memory loop */
1409 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1412 ins[n++] = be_transform_node(pred);
1417 return new_r_Sync(irg, block, n, ins);
1421 ins[0] = be_transform_node(src_mem);
1423 return new_r_Sync(irg, block, 2, ins);
1427 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1428 ir_node *val, const ir_node *orig)
1433 if (ia32_cg_config.use_short_sex_eax) {
1434 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1435 be_dep_on_frame(pval);
1436 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1438 ir_node *imm31 = create_Immediate(NULL, 0, 31);
1439 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1441 SET_IA32_ORIG_NODE(res, orig);
1446 * Generates an ia32 DivMod with additional infrastructure for the
1447 * register allocator if needed.
1449 static ir_node *create_Div(ir_node *node)
1451 dbg_info *dbgi = get_irn_dbg_info(node);
1452 ir_node *block = get_nodes_block(node);
1453 ir_node *new_block = be_transform_node(block);
1460 ir_node *sign_extension;
1461 ia32_address_mode_t am;
1462 ia32_address_t *addr = &am.addr;
1464 /* the upper bits have random contents for smaller modes */
1465 switch (get_irn_opcode(node)) {
1467 op1 = get_Div_left(node);
1468 op2 = get_Div_right(node);
1469 mem = get_Div_mem(node);
1470 mode = get_Div_resmode(node);
1473 op1 = get_Mod_left(node);
1474 op2 = get_Mod_right(node);
1475 mem = get_Mod_mem(node);
1476 mode = get_Mod_resmode(node);
1479 op1 = get_DivMod_left(node);
1480 op2 = get_DivMod_right(node);
1481 mem = get_DivMod_mem(node);
1482 mode = get_DivMod_resmode(node);
1485 panic("invalid divmod node %+F", node);
1488 match_arguments(&am, block, op1, op2, NULL, match_am);
1490 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1491 is the memory of the consumed address. We can have only the second op as address
1492 in Div nodes, so check only op2. */
1493 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1495 if (mode_is_signed(mode)) {
1496 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1497 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1498 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1500 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0);
1501 be_dep_on_frame(sign_extension);
1503 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1504 addr->index, new_mem, am.new_op2,
1505 am.new_op1, sign_extension);
1508 set_irn_pinned(new_node, get_irn_pinned(node));
1510 set_am_attributes(new_node, &am);
1511 SET_IA32_ORIG_NODE(new_node, node);
1513 new_node = fix_mem_proj(new_node, &am);
1519 static ir_node *gen_Mod(ir_node *node)
1521 return create_Div(node);
1524 static ir_node *gen_Div(ir_node *node)
1526 return create_Div(node);
1529 static ir_node *gen_DivMod(ir_node *node)
1531 return create_Div(node);
1537 * Creates an ia32 floating Div.
1539 * @return The created ia32 xDiv node
1541 static ir_node *gen_Quot(ir_node *node)
1543 ir_node *op1 = get_Quot_left(node);
1544 ir_node *op2 = get_Quot_right(node);
1546 if (ia32_cg_config.use_sse2) {
1547 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1549 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1555 * Creates an ia32 Shl.
1557 * @return The created ia32 Shl node
1559 static ir_node *gen_Shl(ir_node *node)
1561 ir_node *left = get_Shl_left(node);
1562 ir_node *right = get_Shl_right(node);
1564 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1565 match_mode_neutral | match_immediate);
1569 * Creates an ia32 Shr.
1571 * @return The created ia32 Shr node
1573 static ir_node *gen_Shr(ir_node *node)
1575 ir_node *left = get_Shr_left(node);
1576 ir_node *right = get_Shr_right(node);
1578 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1584 * Creates an ia32 Sar.
1586 * @return The created ia32 Shrs node
1588 static ir_node *gen_Shrs(ir_node *node)
1590 ir_node *left = get_Shrs_left(node);
1591 ir_node *right = get_Shrs_right(node);
1593 if (is_Const(right)) {
1594 tarval *tv = get_Const_tarval(right);
1595 long val = get_tarval_long(tv);
1597 /* this is a sign extension */
1598 dbg_info *dbgi = get_irn_dbg_info(node);
1599 ir_node *block = be_transform_node(get_nodes_block(node));
1600 ir_node *new_op = be_transform_node(left);
1602 return create_sex_32_64(dbgi, block, new_op, node);
1606 /* 8 or 16 bit sign extension? */
1607 if (is_Const(right) && is_Shl(left)) {
1608 ir_node *shl_left = get_Shl_left(left);
1609 ir_node *shl_right = get_Shl_right(left);
1610 if (is_Const(shl_right)) {
1611 tarval *tv1 = get_Const_tarval(right);
1612 tarval *tv2 = get_Const_tarval(shl_right);
1613 if (tv1 == tv2 && tarval_is_long(tv1)) {
1614 long val = get_tarval_long(tv1);
1615 if (val == 16 || val == 24) {
1616 dbg_info *dbgi = get_irn_dbg_info(node);
1617 ir_node *block = get_nodes_block(node);
1627 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1636 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1642 * Creates an ia32 Rol.
1644 * @param op1 The first operator
1645 * @param op2 The second operator
1646 * @return The created ia32 RotL node
1648 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1650 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1656 * Creates an ia32 Ror.
1657 * NOTE: There is no RotR with immediate because this would always be a RotL
1658 * "imm-mode_size_bits" which can be pre-calculated.
1660 * @param op1 The first operator
1661 * @param op2 The second operator
1662 * @return The created ia32 RotR node
1664 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1666 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1672 * Creates an ia32 RotR or RotL (depending on the found pattern).
1674 * @return The created ia32 RotL or RotR node
1676 static ir_node *gen_Rotl(ir_node *node)
1678 ir_node *rotate = NULL;
1679 ir_node *op1 = get_Rotl_left(node);
1680 ir_node *op2 = get_Rotl_right(node);
1682 /* Firm has only RotL, so we are looking for a right (op2)
1683 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1684 that means we can create a RotR instead of an Add and a RotL */
1688 ir_node *left = get_Add_left(add);
1689 ir_node *right = get_Add_right(add);
1690 if (is_Const(right)) {
1691 tarval *tv = get_Const_tarval(right);
1692 ir_mode *mode = get_irn_mode(node);
1693 long bits = get_mode_size_bits(mode);
1695 if (is_Minus(left) &&
1696 tarval_is_long(tv) &&
1697 get_tarval_long(tv) == bits &&
1700 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1701 rotate = gen_Ror(node, op1, get_Minus_op(left));
1706 if (rotate == NULL) {
1707 rotate = gen_Rol(node, op1, op2);
1716 * Transforms a Minus node.
1718 * @return The created ia32 Minus node
1720 static ir_node *gen_Minus(ir_node *node)
1722 ir_node *op = get_Minus_op(node);
1723 ir_node *block = be_transform_node(get_nodes_block(node));
1724 dbg_info *dbgi = get_irn_dbg_info(node);
1725 ir_mode *mode = get_irn_mode(node);
1730 if (mode_is_float(mode)) {
1731 ir_node *new_op = be_transform_node(op);
1732 if (ia32_cg_config.use_sse2) {
1733 /* TODO: non-optimal... if we have many xXors, then we should
1734 * rather create a load for the const and use that instead of
1735 * several AM nodes... */
1736 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1737 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1738 ir_node *nomem = new_NoMem();
1740 new_node = new_bd_ia32_xXor(dbgi, block, noreg_gp, noreg_gp,
1741 nomem, new_op, noreg_xmm);
1743 size = get_mode_size_bits(mode);
1744 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1746 set_ia32_am_sc(new_node, ent);
1747 set_ia32_op_type(new_node, ia32_AddrModeS);
1748 set_ia32_ls_mode(new_node, mode);
1750 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1753 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1756 SET_IA32_ORIG_NODE(new_node, node);
1762 * Transforms a Not node.
1764 * @return The created ia32 Not node
1766 static ir_node *gen_Not(ir_node *node)
1768 ir_node *op = get_Not_op(node);
1770 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1771 assert (! mode_is_float(get_irn_mode(node)));
1773 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1779 * Transforms an Abs node.
1781 * @return The created ia32 Abs node
1783 static ir_node *gen_Abs(ir_node *node)
1785 ir_node *block = get_nodes_block(node);
1786 ir_node *new_block = be_transform_node(block);
1787 ir_node *op = get_Abs_op(node);
1788 dbg_info *dbgi = get_irn_dbg_info(node);
1789 ir_mode *mode = get_irn_mode(node);
1790 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1791 ir_node *nomem = new_NoMem();
1797 if (mode_is_float(mode)) {
1798 new_op = be_transform_node(op);
1800 if (ia32_cg_config.use_sse2) {
1801 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1802 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_gp, noreg_gp,
1803 nomem, new_op, noreg_fp);
1805 size = get_mode_size_bits(mode);
1806 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1808 set_ia32_am_sc(new_node, ent);
1810 SET_IA32_ORIG_NODE(new_node, node);
1812 set_ia32_op_type(new_node, ia32_AddrModeS);
1813 set_ia32_ls_mode(new_node, mode);
1815 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1816 SET_IA32_ORIG_NODE(new_node, node);
1819 ir_node *xor, *sign_extension;
1821 if (get_mode_size_bits(mode) == 32) {
1822 new_op = be_transform_node(op);
1824 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1827 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1829 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_gp, noreg_gp,
1830 nomem, new_op, sign_extension);
1831 SET_IA32_ORIG_NODE(xor, node);
1833 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_gp, noreg_gp,
1834 nomem, xor, sign_extension);
1835 SET_IA32_ORIG_NODE(new_node, node);
1842 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1844 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1846 dbg_info *dbgi = get_irn_dbg_info(cmp);
1847 ir_node *block = get_nodes_block(cmp);
1848 ir_node *new_block = be_transform_node(block);
1849 ir_node *op1 = be_transform_node(x);
1850 ir_node *op2 = be_transform_node(n);
1852 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1856 * Transform a node returning a "flag" result.
1858 * @param node the node to transform
1859 * @param pnc_out the compare mode to use
1861 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1870 /* we have a Cmp as input */
1871 if (is_Proj(node)) {
1872 ir_node *pred = get_Proj_pred(node);
1874 pn_Cmp pnc = get_Proj_proj(node);
1875 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1876 ir_node *l = get_Cmp_left(pred);
1877 ir_node *r = get_Cmp_right(pred);
1879 ir_node *la = get_And_left(l);
1880 ir_node *ra = get_And_right(l);
1882 ir_node *c = get_Shl_left(la);
1883 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1884 /* (1 << n) & ra) */
1885 ir_node *n = get_Shl_right(la);
1886 flags = gen_bt(pred, ra, n);
1887 /* we must generate a Jc/Jnc jump */
1888 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1891 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1896 ir_node *c = get_Shl_left(ra);
1897 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1898 /* la & (1 << n)) */
1899 ir_node *n = get_Shl_right(ra);
1900 flags = gen_bt(pred, la, n);
1901 /* we must generate a Jc/Jnc jump */
1902 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1905 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1911 flags = be_transform_node(pred);
1917 /* a mode_b value, we have to compare it against 0 */
1918 dbgi = get_irn_dbg_info(node);
1919 new_block = be_transform_node(get_nodes_block(node));
1920 new_op = be_transform_node(node);
1921 noreg = ia32_new_NoReg_gp(env_cg);
1922 nomem = new_NoMem();
1923 flags = new_bd_ia32_Test(dbgi, new_block, noreg, noreg, nomem, new_op,
1924 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1925 *pnc_out = pn_Cmp_Lg;
1930 * Transforms a Load.
1932 * @return the created ia32 Load node
1934 static ir_node *gen_Load(ir_node *node)
1936 ir_node *old_block = get_nodes_block(node);
1937 ir_node *block = be_transform_node(old_block);
1938 ir_node *ptr = get_Load_ptr(node);
1939 ir_node *mem = get_Load_mem(node);
1940 ir_node *new_mem = be_transform_node(mem);
1943 dbg_info *dbgi = get_irn_dbg_info(node);
1944 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1945 ir_mode *mode = get_Load_mode(node);
1948 ia32_address_t addr;
1950 /* construct load address */
1951 memset(&addr, 0, sizeof(addr));
1952 ia32_create_address_mode(&addr, ptr, 0);
1959 base = be_transform_node(base);
1962 if (index == NULL) {
1965 index = be_transform_node(index);
1968 if (mode_is_float(mode)) {
1969 if (ia32_cg_config.use_sse2) {
1970 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
1972 res_mode = mode_xmm;
1974 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
1976 res_mode = mode_vfp;
1979 assert(mode != mode_b);
1981 /* create a conv node with address mode for smaller modes */
1982 if (get_mode_size_bits(mode) < 32) {
1983 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
1984 new_mem, noreg, mode);
1986 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
1991 set_irn_pinned(new_node, get_irn_pinned(node));
1992 set_ia32_op_type(new_node, ia32_AddrModeS);
1993 set_ia32_ls_mode(new_node, mode);
1994 set_address(new_node, &addr);
1996 if (get_irn_pinned(node) == op_pin_state_floats) {
1997 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
1998 && pn_ia32_vfld_res == pn_ia32_Load_res
1999 && pn_ia32_Load_res == pn_ia32_res);
2000 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2003 SET_IA32_ORIG_NODE(new_node, node);
2005 be_dep_on_frame(new_node);
2009 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2010 ir_node *ptr, ir_node *other)
2017 /* we only use address mode if we're the only user of the load */
2018 if (get_irn_n_edges(node) > 1)
2021 load = get_Proj_pred(node);
2024 if (get_nodes_block(load) != block)
2027 /* store should have the same pointer as the load */
2028 if (get_Load_ptr(load) != ptr)
2031 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2032 if (other != NULL &&
2033 get_nodes_block(other) == block &&
2034 heights_reachable_in_block(heights, other, load)) {
2038 if (prevents_AM(block, load, mem))
2040 /* Store should be attached to the load via mem */
2041 assert(heights_reachable_in_block(heights, mem, load));
2046 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2047 ir_node *mem, ir_node *ptr, ir_mode *mode,
2048 construct_binop_dest_func *func,
2049 construct_binop_dest_func *func8bit,
2050 match_flags_t flags)
2052 ir_node *src_block = get_nodes_block(node);
2054 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2061 ia32_address_mode_t am;
2062 ia32_address_t *addr = &am.addr;
2063 memset(&am, 0, sizeof(am));
2065 assert(flags & match_immediate); /* there is no destam node without... */
2066 commutative = (flags & match_commutative) != 0;
2068 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2069 build_address(&am, op1, ia32_create_am_double_use);
2070 new_op = create_immediate_or_transform(op2, 0);
2071 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2072 build_address(&am, op2, ia32_create_am_double_use);
2073 new_op = create_immediate_or_transform(op1, 0);
2078 if (addr->base == NULL)
2079 addr->base = noreg_gp;
2080 if (addr->index == NULL)
2081 addr->index = noreg_gp;
2082 if (addr->mem == NULL)
2083 addr->mem = new_NoMem();
2085 dbgi = get_irn_dbg_info(node);
2086 block = be_transform_node(src_block);
2087 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2089 if (get_mode_size_bits(mode) == 8) {
2090 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2092 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2094 set_address(new_node, addr);
2095 set_ia32_op_type(new_node, ia32_AddrModeD);
2096 set_ia32_ls_mode(new_node, mode);
2097 SET_IA32_ORIG_NODE(new_node, node);
2099 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2100 mem_proj = be_transform_node(am.mem_proj);
2101 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2106 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2107 ir_node *ptr, ir_mode *mode,
2108 construct_unop_dest_func *func)
2110 ir_node *src_block = get_nodes_block(node);
2116 ia32_address_mode_t am;
2117 ia32_address_t *addr = &am.addr;
2119 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2122 memset(&am, 0, sizeof(am));
2123 build_address(&am, op, ia32_create_am_double_use);
2125 dbgi = get_irn_dbg_info(node);
2126 block = be_transform_node(src_block);
2127 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2128 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2129 set_address(new_node, addr);
2130 set_ia32_op_type(new_node, ia32_AddrModeD);
2131 set_ia32_ls_mode(new_node, mode);
2132 SET_IA32_ORIG_NODE(new_node, node);
2134 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2135 mem_proj = be_transform_node(am.mem_proj);
2136 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2141 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2143 ir_mode *mode = get_irn_mode(node);
2144 ir_node *mux_true = get_Mux_true(node);
2145 ir_node *mux_false = get_Mux_false(node);
2155 ia32_address_t addr;
2157 if (get_mode_size_bits(mode) != 8)
2160 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2162 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2168 build_address_ptr(&addr, ptr, mem);
2170 dbgi = get_irn_dbg_info(node);
2171 block = get_nodes_block(node);
2172 new_block = be_transform_node(block);
2173 cond = get_Mux_sel(node);
2174 flags = get_flags_node(cond, &pnc);
2175 new_mem = be_transform_node(mem);
2176 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2177 addr.index, addr.mem, flags, pnc, negated);
2178 set_address(new_node, &addr);
2179 set_ia32_op_type(new_node, ia32_AddrModeD);
2180 set_ia32_ls_mode(new_node, mode);
2181 SET_IA32_ORIG_NODE(new_node, node);
2186 static ir_node *try_create_dest_am(ir_node *node)
2188 ir_node *val = get_Store_value(node);
2189 ir_node *mem = get_Store_mem(node);
2190 ir_node *ptr = get_Store_ptr(node);
2191 ir_mode *mode = get_irn_mode(val);
2192 unsigned bits = get_mode_size_bits(mode);
2197 /* handle only GP modes for now... */
2198 if (!ia32_mode_needs_gp_reg(mode))
2202 /* store must be the only user of the val node */
2203 if (get_irn_n_edges(val) > 1)
2205 /* skip pointless convs */
2207 ir_node *conv_op = get_Conv_op(val);
2208 ir_mode *pred_mode = get_irn_mode(conv_op);
2209 if (!ia32_mode_needs_gp_reg(pred_mode))
2211 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2219 /* value must be in the same block */
2220 if (get_nodes_block(node) != get_nodes_block(val))
2223 switch (get_irn_opcode(val)) {
2225 op1 = get_Add_left(val);
2226 op2 = get_Add_right(val);
2227 if (ia32_cg_config.use_incdec) {
2228 if (is_Const_1(op2)) {
2229 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2231 } else if (is_Const_Minus_1(op2)) {
2232 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2236 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2237 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2238 match_commutative | match_immediate);
2241 op1 = get_Sub_left(val);
2242 op2 = get_Sub_right(val);
2243 if (is_Const(op2)) {
2244 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2246 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2247 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2251 op1 = get_And_left(val);
2252 op2 = get_And_right(val);
2253 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2254 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2255 match_commutative | match_immediate);
2258 op1 = get_Or_left(val);
2259 op2 = get_Or_right(val);
2260 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2261 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2262 match_commutative | match_immediate);
2265 op1 = get_Eor_left(val);
2266 op2 = get_Eor_right(val);
2267 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2268 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2269 match_commutative | match_immediate);
2272 op1 = get_Shl_left(val);
2273 op2 = get_Shl_right(val);
2274 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2275 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2279 op1 = get_Shr_left(val);
2280 op2 = get_Shr_right(val);
2281 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2282 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2286 op1 = get_Shrs_left(val);
2287 op2 = get_Shrs_right(val);
2288 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2289 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2293 op1 = get_Rotl_left(val);
2294 op2 = get_Rotl_right(val);
2295 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2296 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2299 /* TODO: match ROR patterns... */
2301 new_node = try_create_SetMem(val, ptr, mem);
2304 op1 = get_Minus_op(val);
2305 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2308 /* should be lowered already */
2309 assert(mode != mode_b);
2310 op1 = get_Not_op(val);
2311 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2317 if (new_node != NULL) {
2318 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2319 get_irn_pinned(node) == op_pin_state_pinned) {
2320 set_irn_pinned(new_node, op_pin_state_pinned);
2327 static bool possible_int_mode_for_fp(ir_mode *mode)
2331 if (!mode_is_signed(mode))
2333 size = get_mode_size_bits(mode);
2334 if (size != 16 && size != 32)
2339 static int is_float_to_int_conv(const ir_node *node)
2341 ir_mode *mode = get_irn_mode(node);
2345 if (!possible_int_mode_for_fp(mode))
2350 conv_op = get_Conv_op(node);
2351 conv_mode = get_irn_mode(conv_op);
2353 if (!mode_is_float(conv_mode))
2360 * Transform a Store(floatConst) into a sequence of
2363 * @return the created ia32 Store node
2365 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2367 ir_mode *mode = get_irn_mode(cns);
2368 unsigned size = get_mode_size_bytes(mode);
2369 tarval *tv = get_Const_tarval(cns);
2370 ir_node *block = get_nodes_block(node);
2371 ir_node *new_block = be_transform_node(block);
2372 ir_node *ptr = get_Store_ptr(node);
2373 ir_node *mem = get_Store_mem(node);
2374 dbg_info *dbgi = get_irn_dbg_info(node);
2378 ia32_address_t addr;
2380 assert(size % 4 == 0);
2383 build_address_ptr(&addr, ptr, mem);
2387 get_tarval_sub_bits(tv, ofs) |
2388 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2389 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2390 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2391 ir_node *imm = create_Immediate(NULL, 0, val);
2393 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2394 addr.index, addr.mem, imm);
2396 set_irn_pinned(new_node, get_irn_pinned(node));
2397 set_ia32_op_type(new_node, ia32_AddrModeD);
2398 set_ia32_ls_mode(new_node, mode_Iu);
2399 set_address(new_node, &addr);
2400 SET_IA32_ORIG_NODE(new_node, node);
2403 ins[i++] = new_node;
2408 } while (size != 0);
2411 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2418 * Generate a vfist or vfisttp instruction.
2420 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2421 ir_node *mem, ir_node *val, ir_node **fist)
2425 if (ia32_cg_config.use_fisttp) {
2426 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2427 if other users exists */
2428 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2429 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2430 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2431 be_new_Keep(reg_class, irg, block, 1, &value);
2433 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2436 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2439 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2445 * Transforms a general (no special case) Store.
2447 * @return the created ia32 Store node
2449 static ir_node *gen_general_Store(ir_node *node)
2451 ir_node *val = get_Store_value(node);
2452 ir_mode *mode = get_irn_mode(val);
2453 ir_node *block = get_nodes_block(node);
2454 ir_node *new_block = be_transform_node(block);
2455 ir_node *ptr = get_Store_ptr(node);
2456 ir_node *mem = get_Store_mem(node);
2457 dbg_info *dbgi = get_irn_dbg_info(node);
2458 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2459 ir_node *new_val, *new_node, *store;
2460 ia32_address_t addr;
2462 /* check for destination address mode */
2463 new_node = try_create_dest_am(node);
2464 if (new_node != NULL)
2467 /* construct store address */
2468 memset(&addr, 0, sizeof(addr));
2469 ia32_create_address_mode(&addr, ptr, 0);
2471 if (addr.base == NULL) {
2474 addr.base = be_transform_node(addr.base);
2477 if (addr.index == NULL) {
2480 addr.index = be_transform_node(addr.index);
2482 addr.mem = be_transform_node(mem);
2484 if (mode_is_float(mode)) {
2485 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2487 while (is_Conv(val) && mode == get_irn_mode(val)) {
2488 ir_node *op = get_Conv_op(val);
2489 if (!mode_is_float(get_irn_mode(op)))
2493 new_val = be_transform_node(val);
2494 if (ia32_cg_config.use_sse2) {
2495 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2496 addr.index, addr.mem, new_val);
2498 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2499 addr.index, addr.mem, new_val, mode);
2502 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2503 val = get_Conv_op(val);
2505 /* TODO: is this optimisation still necessary at all (middleend)? */
2506 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2507 while (is_Conv(val)) {
2508 ir_node *op = get_Conv_op(val);
2509 if (!mode_is_float(get_irn_mode(op)))
2511 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2515 new_val = be_transform_node(val);
2516 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2518 new_val = create_immediate_or_transform(val, 0);
2519 assert(mode != mode_b);
2521 if (get_mode_size_bits(mode) == 8) {
2522 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2523 addr.index, addr.mem, new_val);
2525 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2526 addr.index, addr.mem, new_val);
2531 set_irn_pinned(store, get_irn_pinned(node));
2532 set_ia32_op_type(store, ia32_AddrModeD);
2533 set_ia32_ls_mode(store, mode);
2535 set_address(store, &addr);
2536 SET_IA32_ORIG_NODE(store, node);
2542 * Transforms a Store.
2544 * @return the created ia32 Store node
2546 static ir_node *gen_Store(ir_node *node)
2548 ir_node *val = get_Store_value(node);
2549 ir_mode *mode = get_irn_mode(val);
2551 if (mode_is_float(mode) && is_Const(val)) {
2552 /* We can transform every floating const store
2553 into a sequence of integer stores.
2554 If the constant is already in a register,
2555 it would be better to use it, but we don't
2556 have this information here. */
2557 return gen_float_const_Store(node, val);
2559 return gen_general_Store(node);
2563 * Transforms a Switch.
2565 * @return the created ia32 SwitchJmp node
2567 static ir_node *create_Switch(ir_node *node)
2569 dbg_info *dbgi = get_irn_dbg_info(node);
2570 ir_node *block = be_transform_node(get_nodes_block(node));
2571 ir_node *sel = get_Cond_selector(node);
2572 ir_node *new_sel = be_transform_node(sel);
2573 long switch_min = LONG_MAX;
2574 long switch_max = LONG_MIN;
2575 long default_pn = get_Cond_defaultProj(node);
2577 const ir_edge_t *edge;
2579 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2581 /* determine the smallest switch case value */
2582 foreach_out_edge(node, edge) {
2583 ir_node *proj = get_edge_src_irn(edge);
2584 long pn = get_Proj_proj(proj);
2585 if (pn == default_pn)
2588 if (pn < switch_min)
2590 if (pn > switch_max)
2594 if ((unsigned long) (switch_max - switch_min) > 256000) {
2595 panic("Size of switch %+F bigger than 256000", node);
2598 if (switch_min != 0) {
2599 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2601 /* if smallest switch case is not 0 we need an additional sub */
2602 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg);
2603 add_ia32_am_offs_int(new_sel, -switch_min);
2604 set_ia32_op_type(new_sel, ia32_AddrModeS);
2606 SET_IA32_ORIG_NODE(new_sel, node);
2609 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2610 SET_IA32_ORIG_NODE(new_node, node);
2616 * Transform a Cond node.
2618 static ir_node *gen_Cond(ir_node *node)
2620 ir_node *block = get_nodes_block(node);
2621 ir_node *new_block = be_transform_node(block);
2622 dbg_info *dbgi = get_irn_dbg_info(node);
2623 ir_node *sel = get_Cond_selector(node);
2624 ir_mode *sel_mode = get_irn_mode(sel);
2625 ir_node *flags = NULL;
2629 if (sel_mode != mode_b) {
2630 return create_Switch(node);
2633 /* we get flags from a Cmp */
2634 flags = get_flags_node(sel, &pnc);
2636 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2637 SET_IA32_ORIG_NODE(new_node, node);
2642 static ir_node *gen_be_Copy(ir_node *node)
2644 ir_node *new_node = be_duplicate_node(node);
2645 ir_mode *mode = get_irn_mode(new_node);
2647 if (ia32_mode_needs_gp_reg(mode)) {
2648 set_irn_mode(new_node, mode_Iu);
2654 static ir_node *create_Fucom(ir_node *node)
2656 dbg_info *dbgi = get_irn_dbg_info(node);
2657 ir_node *block = get_nodes_block(node);
2658 ir_node *new_block = be_transform_node(block);
2659 ir_node *left = get_Cmp_left(node);
2660 ir_node *new_left = be_transform_node(left);
2661 ir_node *right = get_Cmp_right(node);
2665 if (ia32_cg_config.use_fucomi) {
2666 new_right = be_transform_node(right);
2667 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2669 set_ia32_commutative(new_node);
2670 SET_IA32_ORIG_NODE(new_node, node);
2672 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2673 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2675 new_right = be_transform_node(right);
2676 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2679 set_ia32_commutative(new_node);
2681 SET_IA32_ORIG_NODE(new_node, node);
2683 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2684 SET_IA32_ORIG_NODE(new_node, node);
2690 static ir_node *create_Ucomi(ir_node *node)
2692 dbg_info *dbgi = get_irn_dbg_info(node);
2693 ir_node *src_block = get_nodes_block(node);
2694 ir_node *new_block = be_transform_node(src_block);
2695 ir_node *left = get_Cmp_left(node);
2696 ir_node *right = get_Cmp_right(node);
2698 ia32_address_mode_t am;
2699 ia32_address_t *addr = &am.addr;
2701 match_arguments(&am, src_block, left, right, NULL,
2702 match_commutative | match_am);
2704 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2705 addr->mem, am.new_op1, am.new_op2,
2707 set_am_attributes(new_node, &am);
2709 SET_IA32_ORIG_NODE(new_node, node);
2711 new_node = fix_mem_proj(new_node, &am);
2717 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2718 * to fold an and into a test node
2720 static bool can_fold_test_and(ir_node *node)
2722 const ir_edge_t *edge;
2724 /** we can only have eq and lg projs */
2725 foreach_out_edge(node, edge) {
2726 ir_node *proj = get_edge_src_irn(edge);
2727 pn_Cmp pnc = get_Proj_proj(proj);
2728 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2736 * returns true if it is assured, that the upper bits of a node are "clean"
2737 * which means for a 16 or 8 bit value, that the upper bits in the register
2738 * are 0 for unsigned and a copy of the last significant bit for signed
2741 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2743 assert(ia32_mode_needs_gp_reg(mode));
2744 if (get_mode_size_bits(mode) >= 32)
2747 if (is_Proj(transformed_node))
2748 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2750 switch (get_ia32_irn_opcode(transformed_node)) {
2751 case iro_ia32_Conv_I2I:
2752 case iro_ia32_Conv_I2I8Bit: {
2753 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2754 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2756 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2763 if (mode_is_signed(mode)) {
2764 return false; /* TODO handle signed modes */
2766 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2767 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2768 const ia32_immediate_attr_t *attr
2769 = get_ia32_immediate_attr_const(right);
2770 if (attr->symconst == 0 &&
2771 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2775 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2779 /* TODO too conservative if shift amount is constant */
2780 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2783 if (!mode_is_signed(mode)) {
2785 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2786 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2788 /* TODO if one is known to be zero extended, then || is sufficient */
2793 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2794 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2796 case iro_ia32_Const:
2797 case iro_ia32_Immediate: {
2798 const ia32_immediate_attr_t *attr =
2799 get_ia32_immediate_attr_const(transformed_node);
2800 if (mode_is_signed(mode)) {
2801 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2802 return shifted == 0 || shifted == -1;
2804 unsigned long shifted = (unsigned long)attr->offset;
2805 shifted >>= get_mode_size_bits(mode);
2806 return shifted == 0;
2816 * Generate code for a Cmp.
2818 static ir_node *gen_Cmp(ir_node *node)
2820 dbg_info *dbgi = get_irn_dbg_info(node);
2821 ir_node *block = get_nodes_block(node);
2822 ir_node *new_block = be_transform_node(block);
2823 ir_node *left = get_Cmp_left(node);
2824 ir_node *right = get_Cmp_right(node);
2825 ir_mode *cmp_mode = get_irn_mode(left);
2827 ia32_address_mode_t am;
2828 ia32_address_t *addr = &am.addr;
2831 if (mode_is_float(cmp_mode)) {
2832 if (ia32_cg_config.use_sse2) {
2833 return create_Ucomi(node);
2835 return create_Fucom(node);
2839 assert(ia32_mode_needs_gp_reg(cmp_mode));
2841 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2842 cmp_unsigned = !mode_is_signed(cmp_mode);
2843 if (is_Const_0(right) &&
2845 get_irn_n_edges(left) == 1 &&
2846 can_fold_test_and(node)) {
2847 /* Test(and_left, and_right) */
2848 ir_node *and_left = get_And_left(left);
2849 ir_node *and_right = get_And_right(left);
2851 /* matze: code here used mode instead of cmd_mode, I think it is always
2852 * the same as cmp_mode, but I leave this here to see if this is really
2855 assert(get_irn_mode(and_left) == cmp_mode);
2857 match_arguments(&am, block, and_left, and_right, NULL,
2859 match_am | match_8bit_am | match_16bit_am |
2860 match_am_and_immediates | match_immediate);
2862 /* use 32bit compare mode if possible since the opcode is smaller */
2863 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2864 upper_bits_clean(am.new_op2, cmp_mode)) {
2865 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2868 if (get_mode_size_bits(cmp_mode) == 8) {
2869 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2870 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2873 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2874 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2877 /* Cmp(left, right) */
2878 match_arguments(&am, block, left, right, NULL,
2879 match_commutative | match_am | match_8bit_am |
2880 match_16bit_am | match_am_and_immediates |
2882 /* use 32bit compare mode if possible since the opcode is smaller */
2883 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2884 upper_bits_clean(am.new_op2, cmp_mode)) {
2885 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2888 if (get_mode_size_bits(cmp_mode) == 8) {
2889 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2890 addr->index, addr->mem, am.new_op1,
2891 am.new_op2, am.ins_permuted,
2894 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2895 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2898 set_am_attributes(new_node, &am);
2899 set_ia32_ls_mode(new_node, cmp_mode);
2901 SET_IA32_ORIG_NODE(new_node, node);
2903 new_node = fix_mem_proj(new_node, &am);
2908 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2911 dbg_info *dbgi = get_irn_dbg_info(node);
2912 ir_node *block = get_nodes_block(node);
2913 ir_node *new_block = be_transform_node(block);
2914 ir_node *val_true = get_Mux_true(node);
2915 ir_node *val_false = get_Mux_false(node);
2917 ia32_address_mode_t am;
2918 ia32_address_t *addr;
2920 assert(ia32_cg_config.use_cmov);
2921 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2925 match_arguments(&am, block, val_false, val_true, flags,
2926 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2928 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2929 addr->mem, am.new_op1, am.new_op2, new_flags,
2930 am.ins_permuted, pnc);
2931 set_am_attributes(new_node, &am);
2933 SET_IA32_ORIG_NODE(new_node, node);
2935 new_node = fix_mem_proj(new_node, &am);
2941 * Creates a ia32 Setcc instruction.
2943 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2944 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2947 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2948 ir_mode *mode = get_irn_mode(orig_node);
2951 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2952 SET_IA32_ORIG_NODE(new_node, orig_node);
2954 /* we might need to conv the result up */
2955 if (get_mode_size_bits(mode) > 8) {
2956 ir_node *nomem = new_NoMem();
2957 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg, noreg,
2958 nomem, new_node, mode_Bu);
2959 SET_IA32_ORIG_NODE(new_node, orig_node);
2966 * Create instruction for an unsigned Difference or Zero.
2968 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2970 ir_graph *irg = current_ir_graph;
2971 ir_mode *mode = get_irn_mode(psi);
2972 ir_node *nomem = new_NoMem();
2973 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg;
2977 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
2978 match_mode_neutral | match_am | match_immediate | match_two_users);
2980 block = get_nodes_block(new_node);
2982 if (is_Proj(new_node)) {
2983 sub = get_Proj_pred(new_node);
2984 assert(is_ia32_Sub(sub));
2987 set_irn_mode(sub, mode_T);
2988 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2990 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2992 dbgi = get_irn_dbg_info(psi);
2993 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
2995 noreg = ia32_new_NoReg_gp(env_cg);
2996 new_node = new_bd_ia32_And(dbgi, block, noreg, noreg, nomem, new_node, sbb);
2997 set_ia32_commutative(new_node);
3002 * Create an const array of two float consts.
3004 * @param c0 the first constant
3005 * @param c1 the second constant
3006 * @param new_mode IN/OUT for the mode of the constants, if NULL
3007 * smallest possible mode will be used
3009 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3011 ir_mode *mode = *new_mode;
3013 ir_initializer_t *initializer;
3014 tarval *tv0 = get_Const_tarval(c0);
3015 tarval *tv1 = get_Const_tarval(c1);
3018 /* detect the best mode for the constants */
3019 mode = get_tarval_mode(tv0);
3021 if (mode != mode_F) {
3022 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3023 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3025 tv0 = tarval_convert_to(tv0, mode);
3026 tv1 = tarval_convert_to(tv1, mode);
3027 } else if (mode != mode_D) {
3028 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3029 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3031 tv0 = tarval_convert_to(tv0, mode);
3032 tv1 = tarval_convert_to(tv1, mode);
3039 tp = ia32_create_float_type(mode, 4);
3040 tp = ia32_create_float_array(tp);
3042 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3044 set_entity_ld_ident(ent, get_entity_ident(ent));
3045 set_entity_visibility(ent, visibility_local);
3046 set_entity_variability(ent, variability_constant);
3047 set_entity_allocation(ent, allocation_static);
3049 initializer = create_initializer_compound(2);
3051 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3052 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3054 set_entity_initializer(ent, initializer);
3061 * Transforms a Mux node into CMov.
3063 * @return The transformed node.
3065 static ir_node *gen_Mux(ir_node *node)
3067 dbg_info *dbgi = get_irn_dbg_info(node);
3068 ir_node *block = get_nodes_block(node);
3069 ir_node *new_block = be_transform_node(block);
3070 ir_node *mux_true = get_Mux_true(node);
3071 ir_node *mux_false = get_Mux_false(node);
3072 ir_node *cond = get_Mux_sel(node);
3073 ir_mode *mode = get_irn_mode(node);
3078 assert(get_irn_mode(cond) == mode_b);
3080 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3081 if (mode_is_float(mode)) {
3082 ir_node *cmp = get_Proj_pred(cond);
3083 ir_node *cmp_left = get_Cmp_left(cmp);
3084 ir_node *cmp_right = get_Cmp_right(cmp);
3085 pn_Cmp pnc = get_Proj_proj(cond);
3087 if (ia32_cg_config.use_sse2) {
3088 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3089 if (cmp_left == mux_true && cmp_right == mux_false) {
3090 /* Mux(a <= b, a, b) => MIN */
3091 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3092 match_commutative | match_am | match_two_users);
3093 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3094 /* Mux(a <= b, b, a) => MAX */
3095 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3096 match_commutative | match_am | match_two_users);
3098 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3099 if (cmp_left == mux_true && cmp_right == mux_false) {
3100 /* Mux(a >= b, a, b) => MAX */
3101 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3102 match_commutative | match_am | match_two_users);
3103 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3104 /* Mux(a >= b, b, a) => MIN */
3105 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3106 match_commutative | match_am | match_two_users);
3110 if (is_Const(mux_true) && is_Const(mux_false)) {
3111 ia32_address_mode_t am;
3112 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3113 ir_node *nomem = new_NoMem();
3118 flags = get_flags_node(cond, &pnc);
3119 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3121 if (ia32_cg_config.use_sse2) {
3122 /* cannot load from different mode on SSE */
3125 /* x87 can load any mode */
3129 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3131 switch (get_mode_size_bytes(new_mode)) {
3141 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3142 set_ia32_am_scale(new_node, 2);
3147 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3148 set_ia32_am_scale(new_node, 1);
3151 /* arg, shift 16 NOT supported */
3153 new_node = new_bd_ia32_Add(dbgi, new_block, noreg, noreg, nomem, new_node, new_node);
3156 panic("Unsupported constant size");
3159 am.ls_mode = new_mode;
3160 am.addr.base = noreg;
3161 am.addr.index = new_node;
3162 am.addr.mem = nomem;
3164 am.addr.scale = scale;
3165 am.addr.use_frame = 0;
3166 am.addr.frame_entity = NULL;
3167 am.addr.symconst_sign = 0;
3168 am.mem_proj = am.addr.mem;
3169 am.op_type = ia32_AddrModeS;
3172 am.pinned = op_pin_state_floats;
3174 am.ins_permuted = 0;
3176 if (ia32_cg_config.use_sse2)
3177 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3179 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3180 set_am_attributes(load, &am);
3182 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3184 panic("cannot transform floating point Mux");
3187 assert(ia32_mode_needs_gp_reg(mode));
3189 if (is_Proj(cond)) {
3190 ir_node *cmp = get_Proj_pred(cond);
3192 ir_node *cmp_left = get_Cmp_left(cmp);
3193 ir_node *cmp_right = get_Cmp_right(cmp);
3194 pn_Cmp pnc = get_Proj_proj(cond);
3196 /* check for unsigned Doz first */
3197 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3198 is_Const_0(mux_false) && is_Sub(mux_true) &&
3199 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3200 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3201 return create_Doz(node, cmp_left, cmp_right);
3202 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3203 is_Const_0(mux_true) && is_Sub(mux_false) &&
3204 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3205 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3206 return create_Doz(node, cmp_left, cmp_right);
3211 flags = get_flags_node(cond, &pnc);
3213 if (is_Const(mux_true) && is_Const(mux_false)) {
3214 /* both are const, good */
3215 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3216 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3217 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3218 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3220 /* Not that simple. */
3225 new_node = create_CMov(node, cond, flags, pnc);
3233 * Create a conversion from x87 state register to general purpose.
3235 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3237 ir_node *block = be_transform_node(get_nodes_block(node));
3238 ir_node *op = get_Conv_op(node);
3239 ir_node *new_op = be_transform_node(op);
3240 ia32_code_gen_t *cg = env_cg;
3241 ir_graph *irg = current_ir_graph;
3242 dbg_info *dbgi = get_irn_dbg_info(node);
3243 ir_node *noreg = ia32_new_NoReg_gp(cg);
3244 ir_mode *mode = get_irn_mode(node);
3245 ir_node *fist, *load, *mem;
3247 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3248 set_irn_pinned(fist, op_pin_state_floats);
3249 set_ia32_use_frame(fist);
3250 set_ia32_op_type(fist, ia32_AddrModeD);
3252 assert(get_mode_size_bits(mode) <= 32);
3253 /* exception we can only store signed 32 bit integers, so for unsigned
3254 we store a 64bit (signed) integer and load the lower bits */
3255 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3256 set_ia32_ls_mode(fist, mode_Ls);
3258 set_ia32_ls_mode(fist, mode_Is);
3260 SET_IA32_ORIG_NODE(fist, node);
3263 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg, mem);
3265 set_irn_pinned(load, op_pin_state_floats);
3266 set_ia32_use_frame(load);
3267 set_ia32_op_type(load, ia32_AddrModeS);
3268 set_ia32_ls_mode(load, mode_Is);
3269 if (get_ia32_ls_mode(fist) == mode_Ls) {
3270 ia32_attr_t *attr = get_ia32_attr(load);
3271 attr->data.need_64bit_stackent = 1;
3273 ia32_attr_t *attr = get_ia32_attr(load);
3274 attr->data.need_32bit_stackent = 1;
3276 SET_IA32_ORIG_NODE(load, node);
3278 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3282 * Creates a x87 strict Conv by placing a Store and a Load
3284 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3286 ir_node *block = get_nodes_block(node);
3287 ir_graph *irg = current_ir_graph;
3288 dbg_info *dbgi = get_irn_dbg_info(node);
3289 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3290 ir_node *nomem = new_NoMem();
3291 ir_node *frame = get_irg_frame(irg);
3292 ir_node *store, *load;
3295 store = new_bd_ia32_vfst(dbgi, block, frame, noreg, nomem, node, tgt_mode);
3296 set_ia32_use_frame(store);
3297 set_ia32_op_type(store, ia32_AddrModeD);
3298 SET_IA32_ORIG_NODE(store, node);
3300 load = new_bd_ia32_vfld(dbgi, block, frame, noreg, store, tgt_mode);
3301 set_ia32_use_frame(load);
3302 set_ia32_op_type(load, ia32_AddrModeS);
3303 SET_IA32_ORIG_NODE(load, node);
3305 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3309 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3310 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3312 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3314 func = get_mode_size_bits(mode) == 8 ?
3315 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3316 return func(dbgi, block, base, index, mem, val, mode);
3320 * Create a conversion from general purpose to x87 register
3322 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3324 ir_node *src_block = get_nodes_block(node);
3325 ir_node *block = be_transform_node(src_block);
3326 ir_graph *irg = current_ir_graph;
3327 dbg_info *dbgi = get_irn_dbg_info(node);
3328 ir_node *op = get_Conv_op(node);
3329 ir_node *new_op = NULL;
3333 ir_mode *store_mode;
3338 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3339 if (possible_int_mode_for_fp(src_mode)) {
3340 ia32_address_mode_t am;
3342 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3343 if (am.op_type == ia32_AddrModeS) {
3344 ia32_address_t *addr = &am.addr;
3346 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index,
3348 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3351 set_am_attributes(fild, &am);
3352 SET_IA32_ORIG_NODE(fild, node);
3354 fix_mem_proj(fild, &am);
3359 if (new_op == NULL) {
3360 new_op = be_transform_node(op);
3363 noreg = ia32_new_NoReg_gp(env_cg);
3364 nomem = new_NoMem();
3365 mode = get_irn_mode(op);
3367 /* first convert to 32 bit signed if necessary */
3368 if (get_mode_size_bits(src_mode) < 32) {
3369 if (!upper_bits_clean(new_op, src_mode)) {
3370 new_op = create_Conv_I2I(dbgi, block, noreg, noreg, nomem, new_op, src_mode);
3371 SET_IA32_ORIG_NODE(new_op, node);
3376 assert(get_mode_size_bits(mode) == 32);
3379 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg, nomem,
3382 set_ia32_use_frame(store);
3383 set_ia32_op_type(store, ia32_AddrModeD);
3384 set_ia32_ls_mode(store, mode_Iu);
3386 /* exception for 32bit unsigned, do a 64bit spill+load */
3387 if (!mode_is_signed(mode)) {
3390 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3392 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3393 noreg, nomem, zero_const);
3395 set_ia32_use_frame(zero_store);
3396 set_ia32_op_type(zero_store, ia32_AddrModeD);
3397 add_ia32_am_offs_int(zero_store, 4);
3398 set_ia32_ls_mode(zero_store, mode_Iu);
3403 store = new_rd_Sync(dbgi, irg, block, 2, in);
3404 store_mode = mode_Ls;
3406 store_mode = mode_Is;
3410 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg, store);
3412 set_ia32_use_frame(fild);
3413 set_ia32_op_type(fild, ia32_AddrModeS);
3414 set_ia32_ls_mode(fild, store_mode);
3416 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3422 * Create a conversion from one integer mode into another one
3424 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3425 dbg_info *dbgi, ir_node *block, ir_node *op,
3428 ir_node *new_block = be_transform_node(block);
3430 ir_mode *smaller_mode;
3431 ia32_address_mode_t am;
3432 ia32_address_t *addr = &am.addr;
3435 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3436 smaller_mode = src_mode;
3438 smaller_mode = tgt_mode;
3441 #ifdef DEBUG_libfirm
3443 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3448 match_arguments(&am, block, NULL, op, NULL,
3449 match_am | match_8bit_am | match_16bit_am);
3451 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3452 /* unnecessary conv. in theory it shouldn't have been AM */
3453 assert(is_ia32_NoReg_GP(addr->base));
3454 assert(is_ia32_NoReg_GP(addr->index));
3455 assert(is_NoMem(addr->mem));
3456 assert(am.addr.offset == 0);
3457 assert(am.addr.symconst_ent == NULL);
3461 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3462 addr->mem, am.new_op2, smaller_mode);
3463 set_am_attributes(new_node, &am);
3464 /* match_arguments assume that out-mode = in-mode, this isn't true here
3466 set_ia32_ls_mode(new_node, smaller_mode);
3467 SET_IA32_ORIG_NODE(new_node, node);
3468 new_node = fix_mem_proj(new_node, &am);
3473 * Transforms a Conv node.
3475 * @return The created ia32 Conv node
3477 static ir_node *gen_Conv(ir_node *node)
3479 ir_node *block = get_nodes_block(node);
3480 ir_node *new_block = be_transform_node(block);
3481 ir_node *op = get_Conv_op(node);
3482 ir_node *new_op = NULL;
3483 dbg_info *dbgi = get_irn_dbg_info(node);
3484 ir_mode *src_mode = get_irn_mode(op);
3485 ir_mode *tgt_mode = get_irn_mode(node);
3486 int src_bits = get_mode_size_bits(src_mode);
3487 int tgt_bits = get_mode_size_bits(tgt_mode);
3488 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3489 ir_node *nomem = new_NoMem();
3490 ir_node *res = NULL;
3492 assert(!mode_is_int(src_mode) || src_bits <= 32);
3493 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3495 if (src_mode == mode_b) {
3496 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3497 /* nothing to do, we already model bools as 0/1 ints */
3498 return be_transform_node(op);
3501 if (src_mode == tgt_mode) {
3502 if (get_Conv_strict(node)) {
3503 if (ia32_cg_config.use_sse2) {
3504 /* when we are in SSE mode, we can kill all strict no-op conversion */
3505 return be_transform_node(op);
3508 /* this should be optimized already, but who knows... */
3509 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3510 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3511 return be_transform_node(op);
3515 if (mode_is_float(src_mode)) {
3516 new_op = be_transform_node(op);
3517 /* we convert from float ... */
3518 if (mode_is_float(tgt_mode)) {
3520 /* Matze: I'm a bit unsure what the following is for? seems wrong
3522 if (src_mode == mode_E && tgt_mode == mode_D
3523 && !get_Conv_strict(node)) {
3524 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3530 if (ia32_cg_config.use_sse2) {
3531 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3532 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg, noreg,
3534 set_ia32_ls_mode(res, tgt_mode);
3536 if (get_Conv_strict(node)) {
3537 /* if fp_no_float_fold is not set then we assume that we
3538 * don't have any float operations in a non
3539 * mode_float_arithmetic mode and can skip strict upconvs */
3540 if (src_bits < tgt_bits
3541 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3542 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3545 res = gen_x87_strict_conv(tgt_mode, new_op);
3546 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3550 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3555 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3556 if (ia32_cg_config.use_sse2) {
3557 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg, noreg,
3559 set_ia32_ls_mode(res, src_mode);
3561 return gen_x87_fp_to_gp(node);
3565 /* we convert from int ... */
3566 if (mode_is_float(tgt_mode)) {
3568 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3569 if (ia32_cg_config.use_sse2) {
3570 new_op = be_transform_node(op);
3571 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg, noreg,
3573 set_ia32_ls_mode(res, tgt_mode);
3575 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3576 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3577 res = gen_x87_gp_to_fp(node, src_mode);
3579 /* we need a strict-Conv, if the int mode has more bits than the
3581 if (float_mantissa < int_mantissa) {
3582 res = gen_x87_strict_conv(tgt_mode, res);
3583 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3587 } else if (tgt_mode == mode_b) {
3588 /* mode_b lowering already took care that we only have 0/1 values */
3589 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3590 src_mode, tgt_mode));
3591 return be_transform_node(op);
3594 if (src_bits == tgt_bits) {
3595 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3596 src_mode, tgt_mode));
3597 return be_transform_node(op);
3600 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3608 static ir_node *create_immediate_or_transform(ir_node *node,
3609 char immediate_constraint_type)
3611 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3612 if (new_node == NULL) {
3613 new_node = be_transform_node(node);
3619 * Transforms a FrameAddr into an ia32 Add.
3621 static ir_node *gen_be_FrameAddr(ir_node *node)
3623 ir_node *block = be_transform_node(get_nodes_block(node));
3624 ir_node *op = be_get_FrameAddr_frame(node);
3625 ir_node *new_op = be_transform_node(op);
3626 dbg_info *dbgi = get_irn_dbg_info(node);
3627 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3630 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg);
3631 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3632 set_ia32_use_frame(new_node);
3634 SET_IA32_ORIG_NODE(new_node, node);
3640 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3642 static ir_node *gen_be_Return(ir_node *node)
3644 ir_graph *irg = current_ir_graph;
3645 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3646 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3647 ir_entity *ent = get_irg_entity(irg);
3648 ir_type *tp = get_entity_type(ent);
3653 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3654 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3657 int pn_ret_val, pn_ret_mem, arity, i;
3659 assert(ret_val != NULL);
3660 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3661 return be_duplicate_node(node);
3664 res_type = get_method_res_type(tp, 0);
3666 if (! is_Primitive_type(res_type)) {
3667 return be_duplicate_node(node);
3670 mode = get_type_mode(res_type);
3671 if (! mode_is_float(mode)) {
3672 return be_duplicate_node(node);
3675 assert(get_method_n_ress(tp) == 1);
3677 pn_ret_val = get_Proj_proj(ret_val);
3678 pn_ret_mem = get_Proj_proj(ret_mem);
3680 /* get the Barrier */
3681 barrier = get_Proj_pred(ret_val);
3683 /* get result input of the Barrier */
3684 ret_val = get_irn_n(barrier, pn_ret_val);
3685 new_ret_val = be_transform_node(ret_val);
3687 /* get memory input of the Barrier */
3688 ret_mem = get_irn_n(barrier, pn_ret_mem);
3689 new_ret_mem = be_transform_node(ret_mem);
3691 frame = get_irg_frame(irg);
3693 dbgi = get_irn_dbg_info(barrier);
3694 block = be_transform_node(get_nodes_block(barrier));
3696 noreg = ia32_new_NoReg_gp(env_cg);
3698 /* store xmm0 onto stack */
3699 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg,
3700 new_ret_mem, new_ret_val);
3701 set_ia32_ls_mode(sse_store, mode);
3702 set_ia32_op_type(sse_store, ia32_AddrModeD);
3703 set_ia32_use_frame(sse_store);
3705 /* load into x87 register */
3706 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg, sse_store, mode);
3707 set_ia32_op_type(fld, ia32_AddrModeS);
3708 set_ia32_use_frame(fld);
3710 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3711 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3713 /* create a new barrier */
3714 arity = get_irn_arity(barrier);
3715 in = ALLOCAN(ir_node*, arity);
3716 for (i = 0; i < arity; ++i) {
3719 if (i == pn_ret_val) {
3721 } else if (i == pn_ret_mem) {
3724 ir_node *in = get_irn_n(barrier, i);
3725 new_in = be_transform_node(in);
3730 new_barrier = new_ir_node(dbgi, irg, block,
3731 get_irn_op(barrier), get_irn_mode(barrier),
3733 copy_node_attr(barrier, new_barrier);
3734 be_duplicate_deps(barrier, new_barrier);
3735 be_set_transformed_node(barrier, new_barrier);
3737 /* transform normally */
3738 return be_duplicate_node(node);
3742 * Transform a be_AddSP into an ia32_SubSP.
3744 static ir_node *gen_be_AddSP(ir_node *node)
3746 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3747 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3749 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3750 match_am | match_immediate);
3754 * Transform a be_SubSP into an ia32_AddSP
3756 static ir_node *gen_be_SubSP(ir_node *node)
3758 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3759 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3761 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3762 match_am | match_immediate);
3766 * Change some phi modes
3768 static ir_node *gen_Phi(ir_node *node)
3770 ir_node *block = be_transform_node(get_nodes_block(node));
3771 ir_graph *irg = current_ir_graph;
3772 dbg_info *dbgi = get_irn_dbg_info(node);
3773 ir_mode *mode = get_irn_mode(node);
3776 if (ia32_mode_needs_gp_reg(mode)) {
3777 /* we shouldn't have any 64bit stuff around anymore */
3778 assert(get_mode_size_bits(mode) <= 32);
3779 /* all integer operations are on 32bit registers now */
3781 } else if (mode_is_float(mode)) {
3782 if (ia32_cg_config.use_sse2) {
3789 /* phi nodes allow loops, so we use the old arguments for now
3790 * and fix this later */
3791 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3792 get_irn_in(node) + 1);
3793 copy_node_attr(node, phi);
3794 be_duplicate_deps(node, phi);
3796 be_enqueue_preds(node);
3804 static ir_node *gen_IJmp(ir_node *node)
3806 ir_node *block = get_nodes_block(node);
3807 ir_node *new_block = be_transform_node(block);
3808 dbg_info *dbgi = get_irn_dbg_info(node);
3809 ir_node *op = get_IJmp_target(node);
3811 ia32_address_mode_t am;
3812 ia32_address_t *addr = &am.addr;
3814 assert(get_irn_mode(op) == mode_P);
3816 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3818 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3819 addr->mem, am.new_op2);
3820 set_am_attributes(new_node, &am);
3821 SET_IA32_ORIG_NODE(new_node, node);
3823 new_node = fix_mem_proj(new_node, &am);
3829 * Transform a Bound node.
3831 static ir_node *gen_Bound(ir_node *node)
3834 ir_node *lower = get_Bound_lower(node);
3835 dbg_info *dbgi = get_irn_dbg_info(node);
3837 if (is_Const_0(lower)) {
3838 /* typical case for Java */
3839 ir_node *sub, *res, *flags, *block;
3840 ir_graph *irg = current_ir_graph;
3842 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3843 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3845 block = get_nodes_block(res);
3846 if (! is_Proj(res)) {
3848 set_irn_mode(sub, mode_T);
3849 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3851 sub = get_Proj_pred(res);
3853 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3854 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3855 SET_IA32_ORIG_NODE(new_node, node);
3857 panic("generic Bound not supported in ia32 Backend");
3863 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3865 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3866 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3868 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3869 match_immediate | match_mode_neutral);
3872 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3874 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3875 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3876 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3880 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3882 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3883 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3884 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3888 static ir_node *gen_ia32_l_Add(ir_node *node)
3890 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3891 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3892 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3893 match_commutative | match_am | match_immediate |
3894 match_mode_neutral);
3896 if (is_Proj(lowered)) {
3897 lowered = get_Proj_pred(lowered);
3899 assert(is_ia32_Add(lowered));
3900 set_irn_mode(lowered, mode_T);
3906 static ir_node *gen_ia32_l_Adc(ir_node *node)
3908 return gen_binop_flags(node, new_bd_ia32_Adc,
3909 match_commutative | match_am | match_immediate |
3910 match_mode_neutral);
3914 * Transforms a l_MulS into a "real" MulS node.
3916 * @return the created ia32 Mul node
3918 static ir_node *gen_ia32_l_Mul(ir_node *node)
3920 ir_node *left = get_binop_left(node);
3921 ir_node *right = get_binop_right(node);
3923 return gen_binop(node, left, right, new_bd_ia32_Mul,
3924 match_commutative | match_am | match_mode_neutral);
3928 * Transforms a l_IMulS into a "real" IMul1OPS node.
3930 * @return the created ia32 IMul1OP node
3932 static ir_node *gen_ia32_l_IMul(ir_node *node)
3934 ir_node *left = get_binop_left(node);
3935 ir_node *right = get_binop_right(node);
3937 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3938 match_commutative | match_am | match_mode_neutral);
3941 static ir_node *gen_ia32_l_Sub(ir_node *node)
3943 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3944 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3945 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3946 match_am | match_immediate | match_mode_neutral);
3948 if (is_Proj(lowered)) {
3949 lowered = get_Proj_pred(lowered);
3951 assert(is_ia32_Sub(lowered));
3952 set_irn_mode(lowered, mode_T);
3958 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3960 return gen_binop_flags(node, new_bd_ia32_Sbb,
3961 match_am | match_immediate | match_mode_neutral);
3965 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3966 * op1 - target to be shifted
3967 * op2 - contains bits to be shifted into target
3969 * Only op3 can be an immediate.
3971 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3972 ir_node *low, ir_node *count)
3974 ir_node *block = get_nodes_block(node);
3975 ir_node *new_block = be_transform_node(block);
3976 dbg_info *dbgi = get_irn_dbg_info(node);
3977 ir_node *new_high = be_transform_node(high);
3978 ir_node *new_low = be_transform_node(low);
3982 /* the shift amount can be any mode that is bigger than 5 bits, since all
3983 * other bits are ignored anyway */
3984 while (is_Conv(count) &&
3985 get_irn_n_edges(count) == 1 &&
3986 mode_is_int(get_irn_mode(count))) {
3987 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3988 count = get_Conv_op(count);
3990 new_count = create_immediate_or_transform(count, 0);
3992 if (is_ia32_l_ShlD(node)) {
3993 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
3996 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
3999 SET_IA32_ORIG_NODE(new_node, node);
4004 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4006 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4007 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4008 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4009 return gen_lowered_64bit_shifts(node, high, low, count);
4012 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4014 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4015 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4016 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4017 return gen_lowered_64bit_shifts(node, high, low, count);
4020 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4022 ir_node *src_block = get_nodes_block(node);
4023 ir_node *block = be_transform_node(src_block);
4024 ir_graph *irg = current_ir_graph;
4025 dbg_info *dbgi = get_irn_dbg_info(node);
4026 ir_node *frame = get_irg_frame(irg);
4027 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4028 ir_node *nomem = new_NoMem();
4029 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4030 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4031 ir_node *new_val_low = be_transform_node(val_low);
4032 ir_node *new_val_high = be_transform_node(val_high);
4034 ir_node *sync, *fild, *res;
4035 ir_node *store_low, *store_high;
4037 if (ia32_cg_config.use_sse2) {
4038 panic("ia32_l_LLtoFloat not implemented for SSE2");
4042 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg, nomem,
4044 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg, nomem,
4046 SET_IA32_ORIG_NODE(store_low, node);
4047 SET_IA32_ORIG_NODE(store_high, node);
4049 set_ia32_use_frame(store_low);
4050 set_ia32_use_frame(store_high);
4051 set_ia32_op_type(store_low, ia32_AddrModeD);
4052 set_ia32_op_type(store_high, ia32_AddrModeD);
4053 set_ia32_ls_mode(store_low, mode_Iu);
4054 set_ia32_ls_mode(store_high, mode_Is);
4055 add_ia32_am_offs_int(store_high, 4);
4059 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4062 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg, sync);
4064 set_ia32_use_frame(fild);
4065 set_ia32_op_type(fild, ia32_AddrModeS);
4066 set_ia32_ls_mode(fild, mode_Ls);
4068 SET_IA32_ORIG_NODE(fild, node);
4070 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4072 if (! mode_is_signed(get_irn_mode(val_high))) {
4073 ia32_address_mode_t am;
4075 ir_node *count = create_Immediate(NULL, 0, 31);
4078 am.addr.base = ia32_new_NoReg_gp(env_cg);
4079 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4080 am.addr.mem = nomem;
4083 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4084 am.addr.use_frame = 0;
4085 am.addr.frame_entity = NULL;
4086 am.addr.symconst_sign = 0;
4087 am.ls_mode = mode_F;
4088 am.mem_proj = nomem;
4089 am.op_type = ia32_AddrModeS;
4091 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4092 am.pinned = op_pin_state_floats;
4094 am.ins_permuted = 0;
4096 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4097 am.new_op1, am.new_op2, get_fpcw());
4098 set_am_attributes(fadd, &am);
4100 set_irn_mode(fadd, mode_T);
4101 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4106 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4108 ir_node *src_block = get_nodes_block(node);
4109 ir_node *block = be_transform_node(src_block);
4110 ir_graph *irg = current_ir_graph;
4111 dbg_info *dbgi = get_irn_dbg_info(node);
4112 ir_node *frame = get_irg_frame(irg);
4113 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4114 ir_node *nomem = new_NoMem();
4115 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4116 ir_node *new_val = be_transform_node(val);
4117 ir_node *fist, *mem;
4119 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
4120 SET_IA32_ORIG_NODE(fist, node);
4121 set_ia32_use_frame(fist);
4122 set_ia32_op_type(fist, ia32_AddrModeD);
4123 set_ia32_ls_mode(fist, mode_Ls);
4129 * the BAD transformer.
4131 static ir_node *bad_transform(ir_node *node)
4133 panic("No transform function for %+F available.", node);
4137 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4139 ir_graph *irg = current_ir_graph;
4140 ir_node *block = be_transform_node(get_nodes_block(node));
4141 ir_node *pred = get_Proj_pred(node);
4142 ir_node *new_pred = be_transform_node(pred);
4143 ir_node *frame = get_irg_frame(irg);
4144 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4145 dbg_info *dbgi = get_irn_dbg_info(node);
4146 long pn = get_Proj_proj(node);
4151 load = new_bd_ia32_Load(dbgi, block, frame, noreg, new_pred);
4152 SET_IA32_ORIG_NODE(load, node);
4153 set_ia32_use_frame(load);
4154 set_ia32_op_type(load, ia32_AddrModeS);
4155 set_ia32_ls_mode(load, mode_Iu);
4156 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4157 * 32 bit from it with this particular load */
4158 attr = get_ia32_attr(load);
4159 attr->data.need_64bit_stackent = 1;
4161 if (pn == pn_ia32_l_FloattoLL_res_high) {
4162 add_ia32_am_offs_int(load, 4);
4164 assert(pn == pn_ia32_l_FloattoLL_res_low);
4167 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4173 * Transform the Projs of an AddSP.
4175 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4177 ir_node *block = be_transform_node(get_nodes_block(node));
4178 ir_node *pred = get_Proj_pred(node);
4179 ir_node *new_pred = be_transform_node(pred);
4180 ir_graph *irg = current_ir_graph;
4181 dbg_info *dbgi = get_irn_dbg_info(node);
4182 long proj = get_Proj_proj(node);
4184 if (proj == pn_be_AddSP_sp) {
4185 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4186 pn_ia32_SubSP_stack);
4187 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4189 } else if (proj == pn_be_AddSP_res) {
4190 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4191 pn_ia32_SubSP_addr);
4192 } else if (proj == pn_be_AddSP_M) {
4193 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4196 panic("No idea how to transform proj->AddSP");
4200 * Transform the Projs of a SubSP.
4202 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4204 ir_node *block = be_transform_node(get_nodes_block(node));
4205 ir_node *pred = get_Proj_pred(node);
4206 ir_node *new_pred = be_transform_node(pred);
4207 ir_graph *irg = current_ir_graph;
4208 dbg_info *dbgi = get_irn_dbg_info(node);
4209 long proj = get_Proj_proj(node);
4211 if (proj == pn_be_SubSP_sp) {
4212 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4213 pn_ia32_AddSP_stack);
4214 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4216 } else if (proj == pn_be_SubSP_M) {
4217 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4220 panic("No idea how to transform proj->SubSP");
4224 * Transform and renumber the Projs from a Load.
4226 static ir_node *gen_Proj_Load(ir_node *node)
4229 ir_node *block = be_transform_node(get_nodes_block(node));
4230 ir_node *pred = get_Proj_pred(node);
4231 ir_graph *irg = current_ir_graph;
4232 dbg_info *dbgi = get_irn_dbg_info(node);
4233 long proj = get_Proj_proj(node);
4235 /* loads might be part of source address mode matches, so we don't
4236 * transform the ProjMs yet (with the exception of loads whose result is
4239 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4242 /* this is needed, because sometimes we have loops that are only
4243 reachable through the ProjM */
4244 be_enqueue_preds(node);
4245 /* do it in 2 steps, to silence firm verifier */
4246 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4247 set_Proj_proj(res, pn_ia32_mem);
4251 /* renumber the proj */
4252 new_pred = be_transform_node(pred);
4253 if (is_ia32_Load(new_pred)) {
4256 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4258 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4259 case pn_Load_X_regular:
4260 return new_rd_Jmp(dbgi, irg, block);
4261 case pn_Load_X_except:
4262 /* This Load might raise an exception. Mark it. */
4263 set_ia32_exc_label(new_pred, 1);
4264 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4268 } else if (is_ia32_Conv_I2I(new_pred) ||
4269 is_ia32_Conv_I2I8Bit(new_pred)) {
4270 set_irn_mode(new_pred, mode_T);
4271 if (proj == pn_Load_res) {
4272 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4273 } else if (proj == pn_Load_M) {
4274 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4276 } else if (is_ia32_xLoad(new_pred)) {
4279 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4281 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4282 case pn_Load_X_regular:
4283 return new_rd_Jmp(dbgi, irg, block);
4284 case pn_Load_X_except:
4285 /* This Load might raise an exception. Mark it. */
4286 set_ia32_exc_label(new_pred, 1);
4287 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4291 } else if (is_ia32_vfld(new_pred)) {
4294 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4296 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4297 case pn_Load_X_regular:
4298 return new_rd_Jmp(dbgi, irg, block);
4299 case pn_Load_X_except:
4300 /* This Load might raise an exception. Mark it. */
4301 set_ia32_exc_label(new_pred, 1);
4302 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4307 /* can happen for ProJMs when source address mode happened for the
4310 /* however it should not be the result proj, as that would mean the
4311 load had multiple users and should not have been used for
4313 if (proj != pn_Load_M) {
4314 panic("internal error: transformed node not a Load");
4316 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4319 panic("No idea how to transform proj");
4323 * Transform and renumber the Projs from a DivMod like instruction.
4325 static ir_node *gen_Proj_DivMod(ir_node *node)
4327 ir_node *block = be_transform_node(get_nodes_block(node));
4328 ir_node *pred = get_Proj_pred(node);
4329 ir_node *new_pred = be_transform_node(pred);
4330 ir_graph *irg = current_ir_graph;
4331 dbg_info *dbgi = get_irn_dbg_info(node);
4332 long proj = get_Proj_proj(node);
4334 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4336 switch (get_irn_opcode(pred)) {
4340 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4342 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4343 case pn_Div_X_regular:
4344 return new_rd_Jmp(dbgi, irg, block);
4345 case pn_Div_X_except:
4346 set_ia32_exc_label(new_pred, 1);
4347 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4355 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4357 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4358 case pn_Mod_X_except:
4359 set_ia32_exc_label(new_pred, 1);
4360 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4368 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4369 case pn_DivMod_res_div:
4370 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4371 case pn_DivMod_res_mod:
4372 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4373 case pn_DivMod_X_regular:
4374 return new_rd_Jmp(dbgi, irg, block);
4375 case pn_DivMod_X_except:
4376 set_ia32_exc_label(new_pred, 1);
4377 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4386 panic("No idea how to transform proj->DivMod");
4390 * Transform and renumber the Projs from a CopyB.
4392 static ir_node *gen_Proj_CopyB(ir_node *node)
4394 ir_node *block = be_transform_node(get_nodes_block(node));
4395 ir_node *pred = get_Proj_pred(node);
4396 ir_node *new_pred = be_transform_node(pred);
4397 ir_graph *irg = current_ir_graph;
4398 dbg_info *dbgi = get_irn_dbg_info(node);
4399 long proj = get_Proj_proj(node);
4402 case pn_CopyB_M_regular:
4403 if (is_ia32_CopyB_i(new_pred)) {
4404 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4405 } else if (is_ia32_CopyB(new_pred)) {
4406 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4413 panic("No idea how to transform proj->CopyB");
4417 * Transform and renumber the Projs from a Quot.
4419 static ir_node *gen_Proj_Quot(ir_node *node)
4421 ir_node *block = be_transform_node(get_nodes_block(node));
4422 ir_node *pred = get_Proj_pred(node);
4423 ir_node *new_pred = be_transform_node(pred);
4424 ir_graph *irg = current_ir_graph;
4425 dbg_info *dbgi = get_irn_dbg_info(node);
4426 long proj = get_Proj_proj(node);
4430 if (is_ia32_xDiv(new_pred)) {
4431 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4432 } else if (is_ia32_vfdiv(new_pred)) {
4433 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4437 if (is_ia32_xDiv(new_pred)) {
4438 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4439 } else if (is_ia32_vfdiv(new_pred)) {
4440 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4443 case pn_Quot_X_regular:
4444 case pn_Quot_X_except:
4449 panic("No idea how to transform proj->Quot");
4452 static ir_node *gen_be_Call(ir_node *node)
4454 dbg_info *const dbgi = get_irn_dbg_info(node);
4455 ir_graph *const irg = current_ir_graph;
4456 ir_node *const src_block = get_nodes_block(node);
4457 ir_node *const block = be_transform_node(src_block);
4458 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4459 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4460 ir_node *const sp = be_transform_node(src_sp);
4461 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4462 ir_node *const noreg = ia32_new_NoReg_gp(env_cg);
4463 ia32_address_mode_t am;
4464 ia32_address_t *const addr = &am.addr;
4469 ir_node * eax = noreg;
4470 ir_node * ecx = noreg;
4471 ir_node * edx = noreg;
4472 unsigned const pop = be_Call_get_pop(node);
4473 ir_type *const call_tp = be_Call_get_type(node);
4475 /* Run the x87 simulator if the call returns a float value */
4476 if (get_method_n_ress(call_tp) > 0) {
4477 ir_type *const res_type = get_method_res_type(call_tp, 0);
4478 ir_mode *const res_mode = get_type_mode(res_type);
4480 if (res_mode != NULL && mode_is_float(res_mode)) {
4481 env_cg->do_x87_sim = 1;
4485 /* We do not want be_Call direct calls */
4486 assert(be_Call_get_entity(node) == NULL);
4488 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4489 match_am | match_immediate);
4491 i = get_irn_arity(node) - 1;
4492 fpcw = be_transform_node(get_irn_n(node, i--));
4493 for (; i >= be_pos_Call_first_arg; --i) {
4494 arch_register_req_t const *const req = arch_get_register_req(node, i);
4495 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4497 assert(req->type == arch_register_req_type_limited);
4498 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4500 switch (*req->limited) {
4501 case 1 << REG_EAX: assert(eax == noreg); eax = reg_parm; break;
4502 case 1 << REG_ECX: assert(ecx == noreg); ecx = reg_parm; break;
4503 case 1 << REG_EDX: assert(edx == noreg); edx = reg_parm; break;
4504 default: panic("Invalid GP register for register parameter");
4508 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4509 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4510 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4511 set_am_attributes(call, &am);
4512 call = fix_mem_proj(call, &am);
4514 if (get_irn_pinned(node) == op_pin_state_pinned)
4515 set_irn_pinned(call, op_pin_state_pinned);
4517 SET_IA32_ORIG_NODE(call, node);
4522 * Transform Builtin return_address
4524 static ir_node *gen_return_address(ir_node *node) {
4525 ir_node *param = get_Builtin_param(node, 0);
4526 ir_node *frame = get_Builtin_param(node, 1);
4527 dbg_info *dbgi = get_irn_dbg_info(node);
4528 tarval *tv = get_Const_tarval(param);
4529 unsigned long value = get_tarval_long(tv);
4531 ir_node *block = be_transform_node(get_nodes_block(node));
4532 ir_node *ptr = be_transform_node(frame);
4533 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4537 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4538 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4539 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4542 /* load the return address from this frame */
4543 load = new_bd_ia32_Load(dbgi, block, ptr, noreg, get_irg_no_mem(current_ir_graph));
4545 set_irn_pinned(load, get_irn_pinned(node));
4546 set_ia32_op_type(load, ia32_AddrModeS);
4547 set_ia32_ls_mode(load, mode_Iu);
4549 set_ia32_am_offs_int(load, 0);
4550 set_ia32_use_frame(load);
4551 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4553 if (get_irn_pinned(node) == op_pin_state_floats) {
4554 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4555 && pn_ia32_vfld_res == pn_ia32_Load_res
4556 && pn_ia32_Load_res == pn_ia32_res);
4557 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4560 SET_IA32_ORIG_NODE(load, node);
4561 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4565 * Transform Builtin frame_address
4567 static ir_node *gen_frame_address(ir_node *node) {
4568 ir_node *param = get_Builtin_param(node, 0);
4569 ir_node *frame = get_Builtin_param(node, 1);
4570 dbg_info *dbgi = get_irn_dbg_info(node);
4571 tarval *tv = get_Const_tarval(param);
4572 unsigned long value = get_tarval_long(tv);
4574 ir_node *block = be_transform_node(get_nodes_block(node));
4575 ir_node *ptr = be_transform_node(frame);
4576 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4581 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4582 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4583 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4586 /* load the return address from this frame */
4587 load = new_bd_ia32_Load(dbgi, block, ptr, noreg, get_irg_no_mem(current_ir_graph));
4589 set_irn_pinned(load, get_irn_pinned(node));
4590 set_ia32_op_type(load, ia32_AddrModeS);
4591 set_ia32_ls_mode(load, mode_Iu);
4593 ent = ia32_get_frame_address_entity();
4595 set_ia32_am_offs_int(load, 0);
4596 set_ia32_use_frame(load);
4597 set_ia32_frame_ent(load, ent);
4599 /* will fail anyway, but gcc does this: */
4600 set_ia32_am_offs_int(load, 0);
4603 if (get_irn_pinned(node) == op_pin_state_floats) {
4604 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4605 && pn_ia32_vfld_res == pn_ia32_Load_res
4606 && pn_ia32_Load_res == pn_ia32_res);
4607 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4610 SET_IA32_ORIG_NODE(load, node);
4611 return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res);
4615 * Transform Builtin frame_address
4617 static ir_node *gen_prefetch(ir_node *node) {
4619 ir_node *ptr, *block, *mem, *noreg, *base, *index;
4620 ir_node *param, *new_node;
4623 ia32_address_t addr;
4625 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4626 /* no prefetch at all, route memory */
4627 return be_transform_node(get_Builtin_mem(node));
4630 param = get_Builtin_param(node, 1);
4631 tv = get_Const_tarval(param);
4632 rw = get_tarval_long(tv);
4634 /* construct load address */
4635 memset(&addr, 0, sizeof(addr));
4636 ptr = get_Builtin_param(node, 0);
4637 ia32_create_address_mode(&addr, ptr, 0);
4641 noreg = ia32_new_NoReg_gp(env_cg);
4645 base = be_transform_node(base);
4648 if (index == NULL) {
4651 index = be_transform_node(index);
4654 dbgi = get_irn_dbg_info(node);
4655 block = be_transform_node(get_nodes_block(node));
4656 mem = be_transform_node(get_Builtin_mem(node));
4658 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4659 /* we have 3DNow!, this was already checked above */
4660 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4661 } else if (ia32_cg_config.use_sse_prefetch) {
4662 /* note: rw == 1 is IGNORED in that case */
4663 param = get_Builtin_param(node, 2);
4664 tv = get_Const_tarval(param);
4665 locality = get_tarval_long(tv);
4667 /* SSE style prefetch */
4670 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4673 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4676 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4679 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4683 assert(ia32_cg_config.use_3dnow_prefetch);
4684 /* 3DNow! style prefetch */
4685 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4688 set_irn_pinned(new_node, get_irn_pinned(node));
4689 set_ia32_op_type(new_node, ia32_AddrModeS);
4690 set_ia32_ls_mode(new_node, mode_Bu);
4691 set_address(new_node, &addr);
4693 SET_IA32_ORIG_NODE(new_node, node);
4695 be_dep_on_frame(new_node);
4696 return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M);
4702 static ir_node *gen_unop_dest(ir_node *node, construct_binop_dest_func *func) {
4703 ir_node *param = get_Builtin_param(node, 0);
4704 dbg_info *dbgi = get_irn_dbg_info(node);
4706 ir_node *block = get_nodes_block(node);
4707 ir_node *new_block = be_transform_node(block);
4709 ia32_address_mode_t am;
4710 ia32_address_t *addr = &am.addr;
4713 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4715 cnt = (*func)(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4716 set_am_attributes(cnt, &am);
4717 set_ia32_ls_mode(cnt, get_irn_mode(param));
4719 SET_IA32_ORIG_NODE(cnt, node);
4720 return fix_mem_proj(cnt, &am);
4724 * Transform builtin ffs.
4726 static ir_node *gen_ffs(ir_node *node) {
4727 ir_node *bsf = gen_unop_dest(node, new_bd_ia32_Bsf);
4728 ir_node *real = skip_Proj(bsf);
4729 dbg_info *dbgi = get_irn_dbg_info(real);
4730 ir_node *block = get_nodes_block(real);
4731 ir_node *imm = create_Immediate(NULL, 0, 31);
4732 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4733 ir_node *nomem = new_NoMem();
4734 ir_node *flag, *set, *conv, *neg, *or;
4737 if (get_irn_mode(real) != mode_T) {
4738 set_irn_mode(real, mode_T);
4739 bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res);
4742 flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags);
4745 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4746 SET_IA32_ORIG_NODE(set, node);
4749 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg, noreg, nomem, set, mode_Bu);
4750 SET_IA32_ORIG_NODE(conv, node);
4753 neg = new_bd_ia32_Neg(dbgi, block, conv);
4756 or = new_bd_ia32_Or(dbgi, block, noreg, noreg, nomem, bsf, neg);
4757 set_ia32_commutative(or);
4760 return new_bd_ia32_Lea(dbgi, block, or, create_Immediate(NULL, 0, 1));
4764 * Transform builtin clz.
4766 static ir_node *gen_clz(ir_node *node) {
4767 ir_node *bsr = gen_unop_dest(node, new_bd_ia32_Bsr);
4768 ir_node *real = skip_Proj(bsr);
4769 dbg_info *dbgi = get_irn_dbg_info(real);
4770 ir_node *block = get_nodes_block(real);
4771 ir_node *imm = create_Immediate(NULL, 0, 31);
4772 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4774 return new_bd_ia32_Xor(dbgi, block, noreg, noreg, new_NoMem(), bsr, imm);
4778 * Transform builtin ctz.
4780 static ir_node *gen_ctz(ir_node *node) {
4781 return gen_unop_dest(node, new_bd_ia32_Bsf);
4785 * Transform builtin parity.
4787 static ir_node *gen_parity(ir_node *node) {
4788 ir_node *param = get_Builtin_param(node, 0);
4789 dbg_info *dbgi = get_irn_dbg_info(node);
4791 ir_node *block = get_nodes_block(node);
4793 ir_node *new_block = be_transform_node(block);
4794 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4795 ir_node *imm, *cmp, *new_node;
4797 ia32_address_mode_t am;
4798 ia32_address_t *addr = &am.addr;
4802 match_arguments(&am, block, NULL, param, NULL, match_am);
4803 imm = create_Immediate(NULL, 0, 0);
4804 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4805 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4806 set_am_attributes(cmp, &am);
4807 set_ia32_ls_mode(cmp, mode_Iu);
4809 SET_IA32_ORIG_NODE(cmp, node);
4811 cmp = fix_mem_proj(cmp, &am);
4814 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4815 SET_IA32_ORIG_NODE(new_node, node);
4818 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg, noreg,
4819 new_NoMem(), new_node, mode_Bu);
4820 SET_IA32_ORIG_NODE(new_node, node);
4825 * Transform builtin popcount
4827 static ir_node *gen_popcount(ir_node *node) {
4828 ir_node *param = get_Builtin_param(node, 0);
4829 dbg_info *dbgi = get_irn_dbg_info(node);
4831 ir_node *block = get_nodes_block(node);
4832 ir_node *new_block = be_transform_node(block);
4834 ir_node *noreg, *nomem, *new_param;
4835 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4837 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4838 if (ia32_cg_config.use_popcnt) {
4839 ia32_address_mode_t am;
4840 ia32_address_t *addr = &am.addr;
4843 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4845 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4846 set_am_attributes(cnt, &am);
4847 set_ia32_ls_mode(cnt, get_irn_mode(param));
4849 SET_IA32_ORIG_NODE(cnt, node);
4850 return fix_mem_proj(cnt, &am);
4853 noreg = ia32_new_NoReg_gp(env_cg);
4854 nomem = new_NoMem();
4855 new_param = be_transform_node(param);
4857 /* do the standard popcount algo */
4859 /* m1 = x & 0x55555555 */
4860 imm = create_Immediate(NULL, 0, 0x55555555);
4861 m1 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, new_param, imm);
4864 simm = create_Immediate(NULL, 0, 1);
4865 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4867 /* m2 = s1 & 0x55555555 */
4868 m2 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s1, imm);
4871 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4873 /* m4 = m3 & 0x33333333 */
4874 imm = create_Immediate(NULL, 0, 0x33333333);
4875 m4 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m3, imm);
4878 simm = create_Immediate(NULL, 0, 2);
4879 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4881 /* m5 = s2 & 0x33333333 */
4882 m5 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s2, imm);
4885 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4887 /* m7 = m6 & 0x0F0F0F0F */
4888 imm = create_Immediate(NULL, 0, 0x0F0F0F0F);
4889 m7 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m6, imm);
4892 simm = create_Immediate(NULL, 0, 4);
4893 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4895 /* m8 = s3 & 0x0F0F0F0F */
4896 m8 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s3, imm);
4899 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4901 /* m10 = m9 & 0x00FF00FF */
4902 imm = create_Immediate(NULL, 0, 0x00FF00FF);
4903 m10 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m9, imm);
4906 simm = create_Immediate(NULL, 0, 8);
4907 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4909 /* m11 = s4 & 0x00FF00FF */
4910 m11 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s4, imm);
4912 /* m12 = m10 + m11 */
4913 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4915 /* m13 = m12 & 0x0000FFFF */
4916 imm = create_Immediate(NULL, 0, 0x0000FFFF);
4917 m13 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m12, imm);
4919 /* s5 = m12 >> 16 */
4920 simm = create_Immediate(NULL, 0, 16);
4921 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4923 /* res = m13 + s5 */
4924 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4928 * Transform Builtin node.
4930 static ir_node *gen_Builtin(ir_node *node) {
4931 ir_builtin_kind kind = get_Builtin_kind(node);
4934 case ir_bk_return_address:
4935 return gen_return_address(node);
4936 case ir_bk_frame_addess:
4937 return gen_frame_address(node);
4938 case ir_bk_prefetch:
4939 return gen_prefetch(node);
4941 return gen_ffs(node);
4943 return gen_clz(node);
4945 return gen_ctz(node);
4947 return gen_parity(node);
4948 case ir_bk_popcount:
4949 return gen_popcount(node);
4951 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
4955 * Transform Proj(Builtin) node.
4957 static ir_node *gen_Proj_Builtin(ir_node *proj) {
4958 ir_node *node = get_Proj_pred(proj);
4959 ir_node *new_node = be_transform_node(node);
4960 ir_builtin_kind kind = get_Builtin_kind(node);
4963 case ir_bk_return_address:
4964 case ir_bk_frame_addess:
4969 case ir_bk_popcount:
4970 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
4972 case ir_bk_prefetch:
4973 assert(get_Proj_proj(proj) == pn_Builtin_M);
4976 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
4979 static ir_node *gen_be_IncSP(ir_node *node)
4981 ir_node *res = be_duplicate_node(node);
4982 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
4988 * Transform the Projs from a be_Call.
4990 static ir_node *gen_Proj_be_Call(ir_node *node)
4992 ir_node *block = be_transform_node(get_nodes_block(node));
4993 ir_node *call = get_Proj_pred(node);
4994 ir_node *new_call = be_transform_node(call);
4995 ir_graph *irg = current_ir_graph;
4996 dbg_info *dbgi = get_irn_dbg_info(node);
4997 ir_type *method_type = be_Call_get_type(call);
4998 int n_res = get_method_n_ress(method_type);
4999 long proj = get_Proj_proj(node);
5000 ir_mode *mode = get_irn_mode(node);
5004 /* The following is kinda tricky: If we're using SSE, then we have to
5005 * move the result value of the call in floating point registers to an
5006 * xmm register, we therefore construct a GetST0 -> xLoad sequence
5007 * after the call, we have to make sure to correctly make the
5008 * MemProj and the result Proj use these 2 nodes
5010 if (proj == pn_be_Call_M_regular) {
5011 // get new node for result, are we doing the sse load/store hack?
5012 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
5013 ir_node *call_res_new;
5014 ir_node *call_res_pred = NULL;
5016 if (call_res != NULL) {
5017 call_res_new = be_transform_node(call_res);
5018 call_res_pred = get_Proj_pred(call_res_new);
5021 if (call_res_pred == NULL || is_ia32_Call(call_res_pred)) {
5022 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
5025 assert(is_ia32_xLoad(call_res_pred));
5026 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
5030 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
5031 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
5033 ir_node *frame = get_irg_frame(irg);
5034 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
5036 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
5039 /* in case there is no memory output: create one to serialize the copy
5041 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
5042 pn_be_Call_M_regular);
5043 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
5044 pn_be_Call_first_res);
5046 /* store st(0) onto stack */
5047 fstp = new_bd_ia32_vfst(dbgi, block, frame, noreg, call_mem,
5049 set_ia32_op_type(fstp, ia32_AddrModeD);
5050 set_ia32_use_frame(fstp);
5052 /* load into SSE register */
5053 sse_load = new_bd_ia32_xLoad(dbgi, block, frame, noreg, fstp, mode);
5054 set_ia32_op_type(sse_load, ia32_AddrModeS);
5055 set_ia32_use_frame(sse_load);
5057 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
5063 /* transform call modes */
5064 if (mode_is_data(mode)) {
5065 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5069 /* Map from be_Call to ia32_Call proj number */
5070 if (proj == pn_be_Call_sp) {
5071 proj = pn_ia32_Call_stack;
5072 } else if (proj == pn_be_Call_M_regular) {
5073 proj = pn_ia32_Call_M;
5075 arch_register_req_t const *const req = arch_get_register_req_out(node);
5076 int const n_outs = arch_irn_get_n_outs(new_call);
5079 assert(proj >= pn_be_Call_first_res);
5080 assert(req->type & arch_register_req_type_limited);
5082 for (i = 0; i < n_outs; ++i) {
5083 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
5085 if (!(new_req->type & arch_register_req_type_limited) ||
5086 new_req->cls != req->cls ||
5087 *new_req->limited != *req->limited)
5096 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
5098 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5100 case pn_ia32_Call_stack:
5101 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5104 case pn_ia32_Call_fpcw:
5105 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5113 * Transform the Projs from a Cmp.
5115 static ir_node *gen_Proj_Cmp(ir_node *node)
5117 /* this probably means not all mode_b nodes were lowered... */
5118 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5123 * Transform the Projs from a Bound.
5125 static ir_node *gen_Proj_Bound(ir_node *node)
5127 ir_node *new_node, *block;
5128 ir_node *pred = get_Proj_pred(node);
5130 switch (get_Proj_proj(node)) {
5132 return be_transform_node(get_Bound_mem(pred));
5133 case pn_Bound_X_regular:
5134 new_node = be_transform_node(pred);
5135 block = get_nodes_block(new_node);
5136 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
5137 case pn_Bound_X_except:
5138 new_node = be_transform_node(pred);
5139 block = get_nodes_block(new_node);
5140 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
5142 return be_transform_node(get_Bound_index(pred));
5144 panic("unsupported Proj from Bound");
5148 static ir_node *gen_Proj_ASM(ir_node *node)
5154 if (get_irn_mode(node) != mode_M)
5155 return be_duplicate_node(node);
5157 pred = get_Proj_pred(node);
5158 new_pred = be_transform_node(pred);
5159 block = get_nodes_block(new_pred);
5160 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
5161 arch_irn_get_n_outs(new_pred) + 1);
5165 * Transform and potentially renumber Proj nodes.
5167 static ir_node *gen_Proj(ir_node *node)
5169 ir_node *pred = get_Proj_pred(node);
5172 switch (get_irn_opcode(pred)) {
5174 proj = get_Proj_proj(node);
5175 if (proj == pn_Store_M) {
5176 return be_transform_node(pred);
5178 panic("No idea how to transform proj->Store");
5181 return gen_Proj_Load(node);
5183 return gen_Proj_ASM(node);
5185 return gen_Proj_Builtin(node);
5189 return gen_Proj_DivMod(node);
5191 return gen_Proj_CopyB(node);
5193 return gen_Proj_Quot(node);
5195 return gen_Proj_be_SubSP(node);
5197 return gen_Proj_be_AddSP(node);
5199 return gen_Proj_be_Call(node);
5201 return gen_Proj_Cmp(node);
5203 return gen_Proj_Bound(node);
5205 proj = get_Proj_proj(node);
5207 case pn_Start_X_initial_exec: {
5208 ir_node *block = get_nodes_block(pred);
5209 ir_node *new_block = be_transform_node(block);
5210 dbg_info *dbgi = get_irn_dbg_info(node);
5211 /* we exchange the ProjX with a jump */
5212 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
5217 case pn_Start_P_tls:
5218 return gen_Proj_tls(node);
5223 if (is_ia32_l_FloattoLL(pred)) {
5224 return gen_Proj_l_FloattoLL(node);
5226 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5230 ir_mode *mode = get_irn_mode(node);
5231 if (ia32_mode_needs_gp_reg(mode)) {
5232 ir_node *new_pred = be_transform_node(pred);
5233 ir_node *block = be_transform_node(get_nodes_block(node));
5234 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5235 mode_Iu, get_Proj_proj(node));
5236 #ifdef DEBUG_libfirm
5237 new_proj->node_nr = node->node_nr;
5243 return be_duplicate_node(node);
5247 * Enters all transform functions into the generic pointer
5249 static void register_transformers(void)
5251 /* first clear the generic function pointer for all ops */
5252 clear_irp_opcodes_generic_func();
5254 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5255 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5294 /* transform ops from intrinsic lowering */
5306 GEN(ia32_l_LLtoFloat);
5307 GEN(ia32_l_FloattoLL);
5313 /* we should never see these nodes */
5328 /* handle builtins */
5331 /* handle generic backend nodes */
5345 * Pre-transform all unknown and noreg nodes.
5347 static void ia32_pretransform_node(void)
5349 ia32_code_gen_t *cg = env_cg;
5351 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5352 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5353 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5354 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5355 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5356 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5361 * Walker, checks if all ia32 nodes producing more than one result have their
5362 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5364 static void add_missing_keep_walker(ir_node *node, void *data)
5367 unsigned found_projs = 0;
5368 const ir_edge_t *edge;
5369 ir_mode *mode = get_irn_mode(node);
5374 if (!is_ia32_irn(node))
5377 n_outs = arch_irn_get_n_outs(node);
5380 if (is_ia32_SwitchJmp(node))
5383 assert(n_outs < (int) sizeof(unsigned) * 8);
5384 foreach_out_edge(node, edge) {
5385 ir_node *proj = get_edge_src_irn(edge);
5388 /* The node could be kept */
5392 if (get_irn_mode(proj) == mode_M)
5395 pn = get_Proj_proj(proj);
5396 assert(pn < n_outs);
5397 found_projs |= 1 << pn;
5401 /* are keeps missing? */
5403 for (i = 0; i < n_outs; ++i) {
5406 const arch_register_req_t *req;
5407 const arch_register_class_t *cls;
5409 if (found_projs & (1 << i)) {
5413 req = get_ia32_out_req(node, i);
5418 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5422 block = get_nodes_block(node);
5423 in[0] = new_r_Proj(current_ir_graph, block, node,
5424 arch_register_class_mode(cls), i);
5425 if (last_keep != NULL) {
5426 be_Keep_add_node(last_keep, cls, in[0]);
5428 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
5429 if (sched_is_scheduled(node)) {
5430 sched_add_after(node, last_keep);
5437 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5440 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5442 ir_graph *irg = be_get_birg_irg(cg->birg);
5443 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5446 /* do the transformation */
5447 void ia32_transform_graph(ia32_code_gen_t *cg)
5451 register_transformers();
5453 initial_fpcw = NULL;
5455 BE_TIMER_PUSH(t_heights);
5456 heights = heights_new(cg->irg);
5457 BE_TIMER_POP(t_heights);
5458 ia32_calculate_non_address_mode_nodes(cg->birg);
5460 /* the transform phase is not safe for CSE (yet) because several nodes get
5461 * attributes set after their creation */
5462 cse_last = get_opt_cse();
5465 be_transform_graph(cg->birg, ia32_pretransform_node);
5467 set_opt_cse(cse_last);
5469 ia32_free_non_address_mode_nodes();
5470 heights_free(heights);
5474 void ia32_init_transform(void)
5476 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");