2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
36 #include "irgraph_t.h"
41 #include "iredges_t.h"
54 #include "../benode_t.h"
55 #include "../besched.h"
57 #include "../beutil.h"
58 #include "../beirg_t.h"
59 #include "../betranshlp.h"
62 #include "bearch_ia32_t.h"
63 #include "ia32_common_transform.h"
64 #include "ia32_nodes_attr.h"
65 #include "ia32_transform.h"
66 #include "ia32_new_nodes.h"
67 #include "ia32_map_regs.h"
68 #include "ia32_dbg_stat.h"
69 #include "ia32_optimize.h"
70 #include "ia32_util.h"
71 #include "ia32_address_mode.h"
72 #include "ia32_architecture.h"
74 #include "gen_ia32_regalloc_if.h"
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
82 #define TP_SFP_SIGN "ia32_sfp_sign"
83 #define TP_DFP_SIGN "ia32_dfp_sign"
84 #define TP_SFP_ABS "ia32_sfp_abs"
85 #define TP_DFP_ABS "ia32_dfp_abs"
86 #define TP_INT_MAX "ia32_int_max"
88 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
89 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
90 #define ENT_SFP_ABS "IA32_SFP_ABS"
91 #define ENT_DFP_ABS "IA32_DFP_ABS"
92 #define ENT_INT_MAX "IA32_INT_MAX"
94 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
95 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
97 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
99 static ir_node *initial_fpcw = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *create_immediate_or_transform(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
132 dbg_info *dbgi, ir_node *block,
133 ir_node *op, ir_node *orig_node);
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node) {
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node) {
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node) {
147 return is_Const(node) && is_Const_all_one(node);
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_x87_Const(ir_node *node)
155 tarval *tv = get_Const_tarval(node);
156 if (tarval_is_null(tv) || tarval_is_one(tv))
159 /* TODO: match all the other float constants */
164 * returns true if constant can be created with a simple float command
166 static bool is_simple_sse_Const(ir_node *node)
168 tarval *tv = get_Const_tarval(node);
169 ir_mode *mode = get_tarval_mode(tv);
174 if (tarval_is_null(tv) || tarval_is_one(tv))
177 if (mode == mode_D) {
178 unsigned val = get_tarval_sub_bits(tv, 0) |
179 (get_tarval_sub_bits(tv, 1) << 8) |
180 (get_tarval_sub_bits(tv, 2) << 16) |
181 (get_tarval_sub_bits(tv, 3) << 24);
183 /* lower 32bit are zero, really a 32bit constant */
187 /* TODO: match all the other float constants */
192 * Transforms a Const.
194 static ir_node *gen_Const(ir_node *node) {
195 ir_graph *irg = current_ir_graph;
196 ir_node *old_block = get_nodes_block(node);
197 ir_node *block = be_transform_node(old_block);
198 dbg_info *dbgi = get_irn_dbg_info(node);
199 ir_mode *mode = get_irn_mode(node);
201 assert(is_Const(node));
203 if (mode_is_float(mode)) {
205 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
206 ir_node *nomem = new_NoMem();
210 if (ia32_cg_config.use_sse2) {
211 tarval *tv = get_Const_tarval(node);
212 if (tarval_is_null(tv)) {
213 load = new_rd_ia32_xZero(dbgi, irg, block);
214 set_ia32_ls_mode(load, mode);
216 } else if (tarval_is_one(tv)) {
217 int cnst = mode == mode_F ? 26 : 55;
218 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
219 ir_node *imm2 = create_Immediate(NULL, 0, 2);
220 ir_node *pslld, *psrld;
222 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
223 set_ia32_ls_mode(load, mode);
224 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
225 set_ia32_ls_mode(pslld, mode);
226 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
227 set_ia32_ls_mode(psrld, mode);
229 } else if (mode == mode_F) {
230 /* we can place any 32bit constant by using a movd gp, sse */
231 unsigned val = get_tarval_sub_bits(tv, 0) |
232 (get_tarval_sub_bits(tv, 1) << 8) |
233 (get_tarval_sub_bits(tv, 2) << 16) |
234 (get_tarval_sub_bits(tv, 3) << 24);
235 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
236 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
237 set_ia32_ls_mode(load, mode);
240 if (mode == mode_D) {
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *imm32 = create_Immediate(NULL, 0, 32);
247 ir_node *cnst, *psllq;
249 /* fine, lower 32bit are zero, produce 32bit value */
250 val = get_tarval_sub_bits(tv, 4) |
251 (get_tarval_sub_bits(tv, 5) << 8) |
252 (get_tarval_sub_bits(tv, 6) << 16) |
253 (get_tarval_sub_bits(tv, 7) << 24);
254 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
255 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
256 set_ia32_ls_mode(load, mode);
257 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
258 set_ia32_ls_mode(psllq, mode);
263 floatent = create_float_const_entity(node);
265 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
267 set_ia32_op_type(load, ia32_AddrModeS);
268 set_ia32_am_sc(load, floatent);
269 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
270 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
273 if (is_Const_null(node)) {
274 load = new_rd_ia32_vfldz(dbgi, irg, block);
276 set_ia32_ls_mode(load, mode);
277 } else if (is_Const_one(node)) {
278 load = new_rd_ia32_vfld1(dbgi, irg, block);
280 set_ia32_ls_mode(load, mode);
282 floatent = create_float_const_entity(node);
284 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
285 set_ia32_op_type(load, ia32_AddrModeS);
286 set_ia32_am_sc(load, floatent);
287 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
288 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
289 /* take the mode from the entity */
290 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
294 /* Const Nodes before the initial IncSP are a bad idea, because
295 * they could be spilled and we have no SP ready at that point yet.
296 * So add a dependency to the initial frame pointer calculation to
297 * avoid that situation.
299 if (get_irg_start_block(irg) == block) {
300 add_irn_dep(load, get_irg_frame(irg));
303 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
305 } else { /* non-float mode */
307 tarval *tv = get_Const_tarval(node);
310 tv = tarval_convert_to(tv, mode_Iu);
312 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
314 panic("couldn't convert constant tarval (%+F)", node);
316 val = get_tarval_long(tv);
318 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
319 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
322 if (get_irg_start_block(irg) == block) {
323 add_irn_dep(cnst, get_irg_frame(irg));
331 * Transforms a SymConst.
333 static ir_node *gen_SymConst(ir_node *node) {
334 ir_graph *irg = current_ir_graph;
335 ir_node *old_block = get_nodes_block(node);
336 ir_node *block = be_transform_node(old_block);
337 dbg_info *dbgi = get_irn_dbg_info(node);
338 ir_mode *mode = get_irn_mode(node);
341 if (mode_is_float(mode)) {
342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
343 ir_node *nomem = new_NoMem();
345 if (ia32_cg_config.use_sse2)
346 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
348 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if(get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
361 /* Const Nodes before the initial IncSP are a bad idea, because
362 * they could be spilled and we have no SP ready at that point yet
364 if (get_irg_start_block(irg) == block) {
365 add_irn_dep(cnst, get_irg_frame(irg));
368 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
373 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
374 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
375 static const struct {
377 const char *ent_name;
378 const char *cnst_str;
381 } names [ia32_known_const_max] = {
382 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
383 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
384 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
385 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
386 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
388 static ir_entity *ent_cache[ia32_known_const_max];
390 const char *tp_name, *ent_name, *cnst_str;
398 ent_name = names[kct].ent_name;
399 if (! ent_cache[kct]) {
400 tp_name = names[kct].tp_name;
401 cnst_str = names[kct].cnst_str;
403 switch (names[kct].mode) {
404 case 0: mode = mode_Iu; break;
405 case 1: mode = mode_Lu; break;
406 default: mode = mode_F; break;
408 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
409 tp = new_type_primitive(new_id_from_str(tp_name), mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, names[kct].align);
413 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
415 set_entity_ld_ident(ent, get_entity_ident(ent));
416 set_entity_visibility(ent, visibility_local);
417 set_entity_variability(ent, variability_constant);
418 set_entity_allocation(ent, allocation_static);
420 /* we create a new entity here: It's initialization must resist on the
422 rem = current_ir_graph;
423 current_ir_graph = get_const_code_irg();
424 cnst = new_Const(mode, tv);
425 current_ir_graph = rem;
427 set_atomic_ent_value(ent, cnst);
429 /* cache the entry */
430 ent_cache[kct] = ent;
433 return ent_cache[kct];
437 * return true if the node is a Proj(Load) and could be used in source address
438 * mode for another node. Will return only true if the @p other node is not
439 * dependent on the memory of the Load (for binary operations use the other
440 * input here, for unary operations use NULL).
442 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
443 ir_node *other, ir_node *other2, match_flags_t flags)
448 /* float constants are always available */
449 if (is_Const(node)) {
450 ir_mode *mode = get_irn_mode(node);
451 if (mode_is_float(mode)) {
452 if (ia32_cg_config.use_sse2) {
453 if (is_simple_sse_Const(node))
456 if (is_simple_x87_Const(node))
459 if (get_irn_n_edges(node) > 1)
467 load = get_Proj_pred(node);
468 pn = get_Proj_proj(node);
469 if (!is_Load(load) || pn != pn_Load_res)
471 if (get_nodes_block(load) != block)
473 /* we only use address mode if we're the only user of the load */
474 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
476 /* in some edge cases with address mode we might reach the load normally
477 * and through some AM sequence, if it is already materialized then we
478 * can't create an AM node from it */
479 if (be_is_transformed(node))
482 /* don't do AM if other node inputs depend on the load (via mem-proj) */
483 if (other != NULL && prevents_AM(block, load, other))
486 if (other2 != NULL && prevents_AM(block, load, other2))
492 typedef struct ia32_address_mode_t ia32_address_mode_t;
493 struct ia32_address_mode_t {
498 ia32_op_type_t op_type;
502 unsigned commutative : 1;
503 unsigned ins_permuted : 1;
506 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
510 /* construct load address */
511 memset(addr, 0, sizeof(addr[0]));
512 ia32_create_address_mode(addr, ptr, /*force=*/0);
514 noreg_gp = ia32_new_NoReg_gp(env_cg);
515 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
516 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
517 addr->mem = be_transform_node(mem);
520 static void build_address(ia32_address_mode_t *am, ir_node *node)
522 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
523 ia32_address_t *addr = &am->addr;
529 if (is_Const(node)) {
530 ir_entity *entity = create_float_const_entity(node);
531 addr->base = noreg_gp;
532 addr->index = noreg_gp;
533 addr->mem = new_NoMem();
534 addr->symconst_ent = entity;
536 am->ls_mode = get_type_mode(get_entity_type(entity));
537 am->pinned = op_pin_state_floats;
541 load = get_Proj_pred(node);
542 ptr = get_Load_ptr(load);
543 mem = get_Load_mem(load);
544 new_mem = be_transform_node(mem);
545 am->pinned = get_irn_pinned(load);
546 am->ls_mode = get_Load_mode(load);
547 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
550 /* construct load address */
551 ia32_create_address_mode(addr, ptr, /*force=*/0);
553 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
554 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
558 static void set_address(ir_node *node, const ia32_address_t *addr)
560 set_ia32_am_scale(node, addr->scale);
561 set_ia32_am_sc(node, addr->symconst_ent);
562 set_ia32_am_offs_int(node, addr->offset);
563 if(addr->symconst_sign)
564 set_ia32_am_sc_sign(node);
566 set_ia32_use_frame(node);
567 set_ia32_frame_ent(node, addr->frame_entity);
571 * Apply attributes of a given address mode to a node.
573 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
575 set_address(node, &am->addr);
577 set_ia32_op_type(node, am->op_type);
578 set_ia32_ls_mode(node, am->ls_mode);
579 if (am->pinned == op_pin_state_pinned) {
580 /* beware: some nodes are already pinned and did not allow to change the state */
581 if (get_irn_pinned(node) != op_pin_state_pinned)
582 set_irn_pinned(node, op_pin_state_pinned);
585 set_ia32_commutative(node);
589 * Check, if a given node is a Down-Conv, ie. a integer Conv
590 * from a mode with a mode with more bits to a mode with lesser bits.
591 * Moreover, we return only true if the node has not more than 1 user.
593 * @param node the node
594 * @return non-zero if node is a Down-Conv
596 static int is_downconv(const ir_node *node)
604 /* we only want to skip the conv when we're the only user
605 * (not optimal but for now...)
607 if(get_irn_n_edges(node) > 1)
610 src_mode = get_irn_mode(get_Conv_op(node));
611 dest_mode = get_irn_mode(node);
612 return ia32_mode_needs_gp_reg(src_mode)
613 && ia32_mode_needs_gp_reg(dest_mode)
614 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
617 /* Skip all Down-Conv's on a given node and return the resulting node. */
618 ir_node *ia32_skip_downconv(ir_node *node) {
619 while (is_downconv(node))
620 node = get_Conv_op(node);
625 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
627 ir_mode *mode = get_irn_mode(node);
632 if(mode_is_signed(mode)) {
637 block = get_nodes_block(node);
638 dbgi = get_irn_dbg_info(node);
640 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
644 * matches operands of a node into ia32 addressing/operand modes. This covers
645 * usage of source address mode, immediates, operations with non 32-bit modes,
647 * The resulting data is filled into the @p am struct. block is the block
648 * of the node whose arguments are matched. op1, op2 are the first and second
649 * input that are matched (op1 may be NULL). other_op is another unrelated
650 * input that is not matched! but which is needed sometimes to check if AM
651 * for op1/op2 is legal.
652 * @p flags describes the supported modes of the operation in detail.
654 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
655 ir_node *op1, ir_node *op2, ir_node *other_op,
658 ia32_address_t *addr = &am->addr;
659 ir_mode *mode = get_irn_mode(op2);
660 int mode_bits = get_mode_size_bits(mode);
661 ir_node *noreg_gp, *new_op1, *new_op2;
663 unsigned commutative;
664 int use_am_and_immediates;
667 memset(am, 0, sizeof(am[0]));
669 commutative = (flags & match_commutative) != 0;
670 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
671 use_am = (flags & match_am) != 0;
672 use_immediate = (flags & match_immediate) != 0;
673 assert(!use_am_and_immediates || use_immediate);
676 assert(!commutative || op1 != NULL);
677 assert(use_am || !(flags & match_8bit_am));
678 assert(use_am || !(flags & match_16bit_am));
680 if (mode_bits == 8) {
681 if (!(flags & match_8bit_am))
683 /* we don't automatically add upconvs yet */
684 assert((flags & match_mode_neutral) || (flags & match_8bit));
685 } else if (mode_bits == 16) {
686 if (!(flags & match_16bit_am))
688 /* we don't automatically add upconvs yet */
689 assert((flags & match_mode_neutral) || (flags & match_16bit));
692 /* we can simply skip downconvs for mode neutral nodes: the upper bits
693 * can be random for these operations */
694 if (flags & match_mode_neutral) {
695 op2 = ia32_skip_downconv(op2);
697 op1 = ia32_skip_downconv(op1);
701 /* match immediates. firm nodes are normalized: constants are always on the
704 if (!(flags & match_try_am) && use_immediate) {
705 new_op2 = try_create_Immediate(op2, 0);
708 noreg_gp = ia32_new_NoReg_gp(env_cg);
709 if (new_op2 == NULL &&
710 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
711 build_address(am, op2);
712 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
713 if (mode_is_float(mode)) {
714 new_op2 = ia32_new_NoReg_vfp(env_cg);
718 am->op_type = ia32_AddrModeS;
719 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
721 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
723 build_address(am, op1);
725 if (mode_is_float(mode)) {
726 noreg = ia32_new_NoReg_vfp(env_cg);
731 if (new_op2 != NULL) {
734 new_op1 = be_transform_node(op2);
736 am->ins_permuted = 1;
738 am->op_type = ia32_AddrModeS;
740 am->op_type = ia32_Normal;
742 if (flags & match_try_am) {
748 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
750 new_op2 = be_transform_node(op2);
752 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
754 if (addr->base == NULL)
755 addr->base = noreg_gp;
756 if (addr->index == NULL)
757 addr->index = noreg_gp;
758 if (addr->mem == NULL)
759 addr->mem = new_NoMem();
761 am->new_op1 = new_op1;
762 am->new_op2 = new_op2;
763 am->commutative = commutative;
766 static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
768 mark_irn_visited(old_node);
769 be_set_transformed_node(old_node, new_node);
772 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
777 if (am->mem_proj == NULL)
780 /* we have to create a mode_T so the old MemProj can attach to us */
781 mode = get_irn_mode(node);
782 load = get_Proj_pred(am->mem_proj);
784 set_transformed_and_mark(load, node);
786 if (mode != mode_T) {
787 set_irn_mode(node, mode_T);
788 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
795 * Construct a standard binary operation, set AM and immediate if required.
797 * @param node The original node for which the binop is created
798 * @param op1 The first operand
799 * @param op2 The second operand
800 * @param func The node constructor function
801 * @return The constructed ia32 node.
803 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
804 construct_binop_func *func, match_flags_t flags)
807 ir_node *block, *new_block, *new_node;
808 ia32_address_mode_t am;
809 ia32_address_t *addr = &am.addr;
811 block = get_nodes_block(node);
812 match_arguments(&am, block, op1, op2, NULL, flags);
814 dbgi = get_irn_dbg_info(node);
815 new_block = be_transform_node(block);
816 new_node = func(dbgi, current_ir_graph, new_block,
817 addr->base, addr->index, addr->mem,
818 am.new_op1, am.new_op2);
819 set_am_attributes(new_node, &am);
820 /* we can't use source address mode anymore when using immediates */
821 if (!(flags & match_am_and_immediates) &&
822 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
823 set_ia32_am_support(new_node, ia32_am_none);
824 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
826 new_node = fix_mem_proj(new_node, &am);
833 n_ia32_l_binop_right,
834 n_ia32_l_binop_eflags
836 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
837 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
838 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
839 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
840 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
841 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
844 * Construct a binary operation which also consumes the eflags.
846 * @param node The node to transform
847 * @param func The node constructor function
848 * @param flags The match flags
849 * @return The constructor ia32 node
851 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
854 ir_node *src_block = get_nodes_block(node);
855 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
856 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
857 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
859 ir_node *block, *new_node, *new_eflags;
860 ia32_address_mode_t am;
861 ia32_address_t *addr = &am.addr;
863 match_arguments(&am, src_block, op1, op2, eflags, flags);
865 dbgi = get_irn_dbg_info(node);
866 block = be_transform_node(src_block);
867 new_eflags = be_transform_node(eflags);
868 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
869 addr->mem, am.new_op1, am.new_op2, new_eflags);
870 set_am_attributes(new_node, &am);
871 /* we can't use source address mode anymore when using immediates */
872 if (!(flags & match_am_and_immediates) &&
873 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
874 set_ia32_am_support(new_node, ia32_am_none);
875 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
877 new_node = fix_mem_proj(new_node, &am);
882 static ir_node *get_fpcw(void)
885 if (initial_fpcw != NULL)
888 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
889 &ia32_fp_cw_regs[REG_FPCW]);
890 initial_fpcw = be_transform_node(fpcw);
896 * Construct a standard binary operation, set AM and immediate if required.
898 * @param op1 The first operand
899 * @param op2 The second operand
900 * @param func The node constructor function
901 * @return The constructed ia32 node.
903 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
904 construct_binop_float_func *func,
907 ir_mode *mode = get_irn_mode(node);
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 /* cannot use address mode with long double on x87 */
914 if (get_mode_size_bits(mode) > 64)
917 block = get_nodes_block(node);
918 match_arguments(&am, block, op1, op2, NULL, flags);
920 dbgi = get_irn_dbg_info(node);
921 new_block = be_transform_node(block);
922 new_node = func(dbgi, current_ir_graph, new_block,
923 addr->base, addr->index, addr->mem,
924 am.new_op1, am.new_op2, get_fpcw());
925 set_am_attributes(new_node, &am);
927 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
929 new_node = fix_mem_proj(new_node, &am);
935 * Construct a shift/rotate binary operation, sets AM and immediate if required.
937 * @param op1 The first operand
938 * @param op2 The second operand
939 * @param func The node constructor function
940 * @return The constructed ia32 node.
942 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
943 construct_shift_func *func,
947 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
949 assert(! mode_is_float(get_irn_mode(node)));
950 assert(flags & match_immediate);
951 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
953 if (flags & match_mode_neutral) {
954 op1 = ia32_skip_downconv(op1);
955 new_op1 = be_transform_node(op1);
956 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
957 new_op1 = create_upconv(op1, node);
959 new_op1 = be_transform_node(op1);
962 /* the shift amount can be any mode that is bigger than 5 bits, since all
963 * other bits are ignored anyway */
964 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
965 ir_node *const op = get_Conv_op(op2);
966 if (mode_is_float(get_irn_mode(op)))
969 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
971 new_op2 = create_immediate_or_transform(op2, 0);
973 dbgi = get_irn_dbg_info(node);
974 block = get_nodes_block(node);
975 new_block = be_transform_node(block);
976 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
977 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
979 /* lowered shift instruction may have a dependency operand, handle it here */
980 if (get_irn_arity(node) == 3) {
981 /* we have a dependency */
982 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
983 add_irn_dep(new_node, new_dep);
991 * Construct a standard unary operation, set AM and immediate if required.
993 * @param op The operand
994 * @param func The node constructor function
995 * @return The constructed ia32 node.
997 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1001 ir_node *block, *new_block, *new_op, *new_node;
1003 assert(flags == 0 || flags == match_mode_neutral);
1004 if (flags & match_mode_neutral) {
1005 op = ia32_skip_downconv(op);
1008 new_op = be_transform_node(op);
1009 dbgi = get_irn_dbg_info(node);
1010 block = get_nodes_block(node);
1011 new_block = be_transform_node(block);
1012 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1014 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1019 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1020 ia32_address_t *addr)
1022 ir_node *base, *index, *res;
1026 base = ia32_new_NoReg_gp(env_cg);
1028 base = be_transform_node(base);
1031 index = addr->index;
1032 if (index == NULL) {
1033 index = ia32_new_NoReg_gp(env_cg);
1035 index = be_transform_node(index);
1038 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1039 set_address(res, addr);
1045 * Returns non-zero if a given address mode has a symbolic or
1046 * numerical offset != 0.
1048 static int am_has_immediates(const ia32_address_t *addr)
1050 return addr->offset != 0 || addr->symconst_ent != NULL
1051 || addr->frame_entity || addr->use_frame;
1055 * Creates an ia32 Add.
1057 * @return the created ia32 Add node
1059 static ir_node *gen_Add(ir_node *node) {
1060 ir_mode *mode = get_irn_mode(node);
1061 ir_node *op1 = get_Add_left(node);
1062 ir_node *op2 = get_Add_right(node);
1064 ir_node *block, *new_block, *new_node, *add_immediate_op;
1065 ia32_address_t addr;
1066 ia32_address_mode_t am;
1068 if (mode_is_float(mode)) {
1069 if (ia32_cg_config.use_sse2)
1070 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1071 match_commutative | match_am);
1073 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1074 match_commutative | match_am);
1077 ia32_mark_non_am(node);
1079 op2 = ia32_skip_downconv(op2);
1080 op1 = ia32_skip_downconv(op1);
1084 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1085 * 1. Add with immediate -> Lea
1086 * 2. Add with possible source address mode -> Add
1087 * 3. Otherwise -> Lea
1089 memset(&addr, 0, sizeof(addr));
1090 ia32_create_address_mode(&addr, node, /*force=*/1);
1091 add_immediate_op = NULL;
1093 dbgi = get_irn_dbg_info(node);
1094 block = get_nodes_block(node);
1095 new_block = be_transform_node(block);
1098 if(addr.base == NULL && addr.index == NULL) {
1099 ir_graph *irg = current_ir_graph;
1100 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1101 addr.symconst_sign, addr.offset);
1102 add_irn_dep(new_node, get_irg_frame(irg));
1103 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1106 /* add with immediate? */
1107 if(addr.index == NULL) {
1108 add_immediate_op = addr.base;
1109 } else if(addr.base == NULL && addr.scale == 0) {
1110 add_immediate_op = addr.index;
1113 if(add_immediate_op != NULL) {
1114 if(!am_has_immediates(&addr)) {
1115 #ifdef DEBUG_libfirm
1116 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1119 return be_transform_node(add_immediate_op);
1122 new_node = create_lea_from_address(dbgi, new_block, &addr);
1123 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1127 /* test if we can use source address mode */
1128 match_arguments(&am, block, op1, op2, NULL, match_commutative
1129 | match_mode_neutral | match_am | match_immediate | match_try_am);
1131 /* construct an Add with source address mode */
1132 if (am.op_type == ia32_AddrModeS) {
1133 ir_graph *irg = current_ir_graph;
1134 ia32_address_t *am_addr = &am.addr;
1135 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1136 am_addr->index, am_addr->mem, am.new_op1,
1138 set_am_attributes(new_node, &am);
1139 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1141 new_node = fix_mem_proj(new_node, &am);
1146 /* otherwise construct a lea */
1147 new_node = create_lea_from_address(dbgi, new_block, &addr);
1148 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1153 * Creates an ia32 Mul.
1155 * @return the created ia32 Mul node
1157 static ir_node *gen_Mul(ir_node *node) {
1158 ir_node *op1 = get_Mul_left(node);
1159 ir_node *op2 = get_Mul_right(node);
1160 ir_mode *mode = get_irn_mode(node);
1162 if (mode_is_float(mode)) {
1163 if (ia32_cg_config.use_sse2)
1164 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1165 match_commutative | match_am);
1167 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1168 match_commutative | match_am);
1170 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1171 match_commutative | match_am | match_mode_neutral |
1172 match_immediate | match_am_and_immediates);
1176 * Creates an ia32 Mulh.
1177 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1178 * this result while Mul returns the lower 32 bit.
1180 * @return the created ia32 Mulh node
1182 static ir_node *gen_Mulh(ir_node *node)
1184 ir_node *block = get_nodes_block(node);
1185 ir_node *new_block = be_transform_node(block);
1186 dbg_info *dbgi = get_irn_dbg_info(node);
1187 ir_node *op1 = get_Mulh_left(node);
1188 ir_node *op2 = get_Mulh_right(node);
1189 ir_mode *mode = get_irn_mode(node);
1190 construct_binop_func *func;
1192 ir_node *proj_res_high;
1194 func = mode_is_signed(mode) ? new_rd_ia32_IMul1OP : new_rd_ia32_Mul;
1195 new_node = gen_binop(node, op1, op2, func, match_commutative | match_am);
1197 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1198 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1199 mode_Iu, pn_ia32_IMul1OP_res_high);
1200 return proj_res_high;
1206 * Creates an ia32 And.
1208 * @return The created ia32 And node
1210 static ir_node *gen_And(ir_node *node) {
1211 ir_node *op1 = get_And_left(node);
1212 ir_node *op2 = get_And_right(node);
1213 assert(! mode_is_float(get_irn_mode(node)));
1215 /* is it a zero extension? */
1216 if (is_Const(op2)) {
1217 tarval *tv = get_Const_tarval(op2);
1218 long v = get_tarval_long(tv);
1220 if (v == 0xFF || v == 0xFFFF) {
1221 dbg_info *dbgi = get_irn_dbg_info(node);
1222 ir_node *block = get_nodes_block(node);
1229 assert(v == 0xFFFF);
1232 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1237 return gen_binop(node, op1, op2, new_rd_ia32_And,
1238 match_commutative | match_mode_neutral | match_am
1245 * Creates an ia32 Or.
1247 * @return The created ia32 Or node
1249 static ir_node *gen_Or(ir_node *node) {
1250 ir_node *op1 = get_Or_left(node);
1251 ir_node *op2 = get_Or_right(node);
1253 assert (! mode_is_float(get_irn_mode(node)));
1254 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1255 | match_mode_neutral | match_am | match_immediate);
1261 * Creates an ia32 Eor.
1263 * @return The created ia32 Eor node
1265 static ir_node *gen_Eor(ir_node *node) {
1266 ir_node *op1 = get_Eor_left(node);
1267 ir_node *op2 = get_Eor_right(node);
1269 assert(! mode_is_float(get_irn_mode(node)));
1270 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1271 | match_mode_neutral | match_am | match_immediate);
1276 * Creates an ia32 Sub.
1278 * @return The created ia32 Sub node
1280 static ir_node *gen_Sub(ir_node *node) {
1281 ir_node *op1 = get_Sub_left(node);
1282 ir_node *op2 = get_Sub_right(node);
1283 ir_mode *mode = get_irn_mode(node);
1285 if (mode_is_float(mode)) {
1286 if (ia32_cg_config.use_sse2)
1287 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1289 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1293 if (is_Const(op2)) {
1294 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1298 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1299 | match_am | match_immediate);
1302 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1303 ir_node *const src_val,
1304 ir_node *const src_mem,
1305 ir_node *const am_mem)
1307 if (is_NoMem(am_mem)) {
1308 return be_transform_node(src_mem);
1309 } else if (is_Proj(src_val) &&
1311 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1312 /* avoid memory loop */
1314 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1315 ir_node *const ptr_pred = get_Proj_pred(src_val);
1316 int const arity = get_Sync_n_preds(src_mem);
1321 NEW_ARR_A(ir_node*, ins, arity + 1);
1323 for (i = arity - 1; i >= 0; --i) {
1324 ir_node *const pred = get_Sync_pred(src_mem, i);
1326 /* avoid memory loop */
1327 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1330 ins[n++] = be_transform_node(pred);
1335 return new_r_Sync(irg, block, n, ins);
1339 ins[0] = be_transform_node(src_mem);
1341 return new_r_Sync(irg, block, 2, ins);
1346 * Generates an ia32 DivMod with additional infrastructure for the
1347 * register allocator if needed.
1349 static ir_node *create_Div(ir_node *node)
1351 ir_graph *irg = current_ir_graph;
1352 dbg_info *dbgi = get_irn_dbg_info(node);
1353 ir_node *block = get_nodes_block(node);
1354 ir_node *new_block = be_transform_node(block);
1361 ir_node *sign_extension;
1362 ia32_address_mode_t am;
1363 ia32_address_t *addr = &am.addr;
1365 /* the upper bits have random contents for smaller modes */
1366 switch (get_irn_opcode(node)) {
1368 op1 = get_Div_left(node);
1369 op2 = get_Div_right(node);
1370 mem = get_Div_mem(node);
1371 mode = get_Div_resmode(node);
1374 op1 = get_Mod_left(node);
1375 op2 = get_Mod_right(node);
1376 mem = get_Mod_mem(node);
1377 mode = get_Mod_resmode(node);
1380 op1 = get_DivMod_left(node);
1381 op2 = get_DivMod_right(node);
1382 mem = get_DivMod_mem(node);
1383 mode = get_DivMod_resmode(node);
1386 panic("invalid divmod node %+F", node);
1389 match_arguments(&am, block, op1, op2, NULL, match_am);
1391 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1392 is the memory of the consumed address. We can have only the second op as address
1393 in Div nodes, so check only op2. */
1394 new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
1396 if (mode_is_signed(mode)) {
1397 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1398 add_irn_dep(produceval, get_irg_frame(irg));
1399 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1402 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1403 addr->index, new_mem, am.new_op2,
1404 am.new_op1, sign_extension);
1406 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1407 add_irn_dep(sign_extension, get_irg_frame(irg));
1409 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1410 addr->index, new_mem, am.new_op2,
1411 am.new_op1, sign_extension);
1414 set_irn_pinned(new_node, get_irn_pinned(node));
1416 set_am_attributes(new_node, &am);
1417 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1419 new_node = fix_mem_proj(new_node, &am);
1425 static ir_node *gen_Mod(ir_node *node) {
1426 return create_Div(node);
1429 static ir_node *gen_Div(ir_node *node) {
1430 return create_Div(node);
1433 static ir_node *gen_DivMod(ir_node *node) {
1434 return create_Div(node);
1440 * Creates an ia32 floating Div.
1442 * @return The created ia32 xDiv node
1444 static ir_node *gen_Quot(ir_node *node)
1446 ir_node *op1 = get_Quot_left(node);
1447 ir_node *op2 = get_Quot_right(node);
1449 if (ia32_cg_config.use_sse2) {
1450 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1452 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1458 * Creates an ia32 Shl.
1460 * @return The created ia32 Shl node
1462 static ir_node *gen_Shl(ir_node *node) {
1463 ir_node *left = get_Shl_left(node);
1464 ir_node *right = get_Shl_right(node);
1466 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1467 match_mode_neutral | match_immediate);
1471 * Creates an ia32 Shr.
1473 * @return The created ia32 Shr node
1475 static ir_node *gen_Shr(ir_node *node) {
1476 ir_node *left = get_Shr_left(node);
1477 ir_node *right = get_Shr_right(node);
1479 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1485 * Creates an ia32 Sar.
1487 * @return The created ia32 Shrs node
1489 static ir_node *gen_Shrs(ir_node *node) {
1490 ir_node *left = get_Shrs_left(node);
1491 ir_node *right = get_Shrs_right(node);
1492 ir_mode *mode = get_irn_mode(node);
1494 if(is_Const(right) && mode == mode_Is) {
1495 tarval *tv = get_Const_tarval(right);
1496 long val = get_tarval_long(tv);
1498 /* this is a sign extension */
1499 ir_graph *irg = current_ir_graph;
1500 dbg_info *dbgi = get_irn_dbg_info(node);
1501 ir_node *block = be_transform_node(get_nodes_block(node));
1503 ir_node *new_op = be_transform_node(op);
1504 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1505 add_irn_dep(pval, get_irg_frame(irg));
1507 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1511 /* 8 or 16 bit sign extension? */
1512 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1513 ir_node *shl_left = get_Shl_left(left);
1514 ir_node *shl_right = get_Shl_right(left);
1515 if(is_Const(shl_right)) {
1516 tarval *tv1 = get_Const_tarval(right);
1517 tarval *tv2 = get_Const_tarval(shl_right);
1518 if(tv1 == tv2 && tarval_is_long(tv1)) {
1519 long val = get_tarval_long(tv1);
1520 if(val == 16 || val == 24) {
1521 dbg_info *dbgi = get_irn_dbg_info(node);
1522 ir_node *block = get_nodes_block(node);
1532 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1541 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1547 * Creates an ia32 Rol.
1549 * @param op1 The first operator
1550 * @param op2 The second operator
1551 * @return The created ia32 RotL node
1553 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1554 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1560 * Creates an ia32 Ror.
1561 * NOTE: There is no RotR with immediate because this would always be a RotL
1562 * "imm-mode_size_bits" which can be pre-calculated.
1564 * @param op1 The first operator
1565 * @param op2 The second operator
1566 * @return The created ia32 RotR node
1568 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1569 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1575 * Creates an ia32 RotR or RotL (depending on the found pattern).
1577 * @return The created ia32 RotL or RotR node
1579 static ir_node *gen_Rotl(ir_node *node) {
1580 ir_node *rotate = NULL;
1581 ir_node *op1 = get_Rotl_left(node);
1582 ir_node *op2 = get_Rotl_right(node);
1584 /* Firm has only RotL, so we are looking for a right (op2)
1585 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1586 that means we can create a RotR instead of an Add and a RotL */
1590 ir_node *left = get_Add_left(add);
1591 ir_node *right = get_Add_right(add);
1592 if (is_Const(right)) {
1593 tarval *tv = get_Const_tarval(right);
1594 ir_mode *mode = get_irn_mode(node);
1595 long bits = get_mode_size_bits(mode);
1597 if (is_Minus(left) &&
1598 tarval_is_long(tv) &&
1599 get_tarval_long(tv) == bits &&
1602 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1603 rotate = gen_Ror(node, op1, get_Minus_op(left));
1608 if (rotate == NULL) {
1609 rotate = gen_Rol(node, op1, op2);
1618 * Transforms a Minus node.
1620 * @return The created ia32 Minus node
1622 static ir_node *gen_Minus(ir_node *node)
1624 ir_node *op = get_Minus_op(node);
1625 ir_node *block = be_transform_node(get_nodes_block(node));
1626 ir_graph *irg = current_ir_graph;
1627 dbg_info *dbgi = get_irn_dbg_info(node);
1628 ir_mode *mode = get_irn_mode(node);
1633 if (mode_is_float(mode)) {
1634 ir_node *new_op = be_transform_node(op);
1635 if (ia32_cg_config.use_sse2) {
1636 /* TODO: non-optimal... if we have many xXors, then we should
1637 * rather create a load for the const and use that instead of
1638 * several AM nodes... */
1639 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1640 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1641 ir_node *nomem = new_rd_NoMem(irg);
1643 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1644 nomem, new_op, noreg_xmm);
1646 size = get_mode_size_bits(mode);
1647 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1649 set_ia32_am_sc(new_node, ent);
1650 set_ia32_op_type(new_node, ia32_AddrModeS);
1651 set_ia32_ls_mode(new_node, mode);
1653 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1656 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1659 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1665 * Transforms a Not node.
1667 * @return The created ia32 Not node
1669 static ir_node *gen_Not(ir_node *node) {
1670 ir_node *op = get_Not_op(node);
1672 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1673 assert (! mode_is_float(get_irn_mode(node)));
1675 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1681 * Transforms an Abs node.
1683 * @return The created ia32 Abs node
1685 static ir_node *gen_Abs(ir_node *node)
1687 ir_node *block = get_nodes_block(node);
1688 ir_node *new_block = be_transform_node(block);
1689 ir_node *op = get_Abs_op(node);
1690 ir_graph *irg = current_ir_graph;
1691 dbg_info *dbgi = get_irn_dbg_info(node);
1692 ir_mode *mode = get_irn_mode(node);
1693 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1694 ir_node *nomem = new_NoMem();
1700 if (mode_is_float(mode)) {
1701 new_op = be_transform_node(op);
1703 if (ia32_cg_config.use_sse2) {
1704 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1705 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1706 nomem, new_op, noreg_fp);
1708 size = get_mode_size_bits(mode);
1709 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1711 set_ia32_am_sc(new_node, ent);
1713 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1715 set_ia32_op_type(new_node, ia32_AddrModeS);
1716 set_ia32_ls_mode(new_node, mode);
1718 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1719 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1722 ir_node *xor, *pval, *sign_extension;
1724 if (get_mode_size_bits(mode) == 32) {
1725 new_op = be_transform_node(op);
1727 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1730 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1731 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1734 add_irn_dep(pval, get_irg_frame(irg));
1735 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1737 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1738 nomem, new_op, sign_extension);
1739 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1741 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1742 nomem, xor, sign_extension);
1743 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1750 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1752 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1753 dbg_info *dbgi = get_irn_dbg_info(cmp);
1754 ir_node *block = get_nodes_block(cmp);
1755 ir_node *new_block = be_transform_node(block);
1756 ir_node *op1 = be_transform_node(x);
1757 ir_node *op2 = be_transform_node(n);
1759 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1763 * Transform a node returning a "flag" result.
1765 * @param node the node to transform
1766 * @param pnc_out the compare mode to use
1768 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1777 /* we have a Cmp as input */
1778 if (is_Proj(node)) {
1779 ir_node *pred = get_Proj_pred(node);
1781 pn_Cmp pnc = get_Proj_proj(node);
1782 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1783 ir_node *l = get_Cmp_left(pred);
1784 ir_node *r = get_Cmp_right(pred);
1786 ir_node *la = get_And_left(l);
1787 ir_node *ra = get_And_right(l);
1789 ir_node *c = get_Shl_left(la);
1790 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1791 /* (1 << n) & ra) */
1792 ir_node *n = get_Shl_right(la);
1793 flags = gen_bt(pred, ra, n);
1794 /* we must generate a Jc/Jnc jump */
1795 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1798 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1803 ir_node *c = get_Shl_left(ra);
1804 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1805 /* la & (1 << n)) */
1806 ir_node *n = get_Shl_right(ra);
1807 flags = gen_bt(pred, la, n);
1808 /* we must generate a Jc/Jnc jump */
1809 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1812 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1818 flags = be_transform_node(pred);
1824 /* a mode_b value, we have to compare it against 0 */
1825 dbgi = get_irn_dbg_info(node);
1826 new_block = be_transform_node(get_nodes_block(node));
1827 new_op = be_transform_node(node);
1828 noreg = ia32_new_NoReg_gp(env_cg);
1829 nomem = new_NoMem();
1830 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1831 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1832 *pnc_out = pn_Cmp_Lg;
1837 * Transforms a Load.
1839 * @return the created ia32 Load node
1841 static ir_node *gen_Load(ir_node *node) {
1842 ir_node *old_block = get_nodes_block(node);
1843 ir_node *block = be_transform_node(old_block);
1844 ir_node *ptr = get_Load_ptr(node);
1845 ir_node *mem = get_Load_mem(node);
1846 ir_node *new_mem = be_transform_node(mem);
1849 ir_graph *irg = current_ir_graph;
1850 dbg_info *dbgi = get_irn_dbg_info(node);
1851 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1852 ir_mode *mode = get_Load_mode(node);
1855 ia32_address_t addr;
1857 /* construct load address */
1858 memset(&addr, 0, sizeof(addr));
1859 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1866 base = be_transform_node(base);
1872 index = be_transform_node(index);
1875 if (mode_is_float(mode)) {
1876 if (ia32_cg_config.use_sse2) {
1877 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1879 res_mode = mode_xmm;
1881 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1883 res_mode = mode_vfp;
1886 assert(mode != mode_b);
1888 /* create a conv node with address mode for smaller modes */
1889 if(get_mode_size_bits(mode) < 32) {
1890 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1891 new_mem, noreg, mode);
1893 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1898 set_irn_pinned(new_node, get_irn_pinned(node));
1899 set_ia32_op_type(new_node, ia32_AddrModeS);
1900 set_ia32_ls_mode(new_node, mode);
1901 set_address(new_node, &addr);
1903 if(get_irn_pinned(node) == op_pin_state_floats) {
1904 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1907 /* make sure we are scheduled behind the initial IncSP/Barrier
1908 * to avoid spills being placed before it
1910 if (block == get_irg_start_block(irg)) {
1911 add_irn_dep(new_node, get_irg_frame(irg));
1914 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1919 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1920 ir_node *ptr, ir_node *other)
1927 /* we only use address mode if we're the only user of the load */
1928 if (get_irn_n_edges(node) > 1)
1931 load = get_Proj_pred(node);
1934 if (get_nodes_block(load) != block)
1937 /* store should have the same pointer as the load */
1938 if (get_Load_ptr(load) != ptr)
1941 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1942 if (other != NULL &&
1943 get_nodes_block(other) == block &&
1944 heights_reachable_in_block(heights, other, load)) {
1951 for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
1952 ir_node *const pred = get_Sync_pred(mem, i);
1954 if (is_Proj(pred) && get_Proj_pred(pred) == load)
1957 if (get_nodes_block(pred) == block &&
1958 heights_reachable_in_block(heights, pred, load)) {
1963 /* Store should be attached to the load */
1964 if (!is_Proj(mem) || get_Proj_pred(mem) != load)
1971 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1972 ir_node *mem, ir_node *ptr, ir_mode *mode,
1973 construct_binop_dest_func *func,
1974 construct_binop_dest_func *func8bit,
1975 match_flags_t flags)
1977 ir_node *src_block = get_nodes_block(node);
1979 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1980 ir_graph *irg = current_ir_graph;
1987 ia32_address_mode_t am;
1988 ia32_address_t *addr = &am.addr;
1989 memset(&am, 0, sizeof(am));
1991 assert(flags & match_dest_am);
1992 assert(flags & match_immediate); /* there is no destam node without... */
1993 commutative = (flags & match_commutative) != 0;
1995 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1996 build_address(&am, op1);
1997 new_op = create_immediate_or_transform(op2, 0);
1998 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1999 build_address(&am, op2);
2000 new_op = create_immediate_or_transform(op1, 0);
2005 if(addr->base == NULL)
2006 addr->base = noreg_gp;
2007 if(addr->index == NULL)
2008 addr->index = noreg_gp;
2009 if(addr->mem == NULL)
2010 addr->mem = new_NoMem();
2012 dbgi = get_irn_dbg_info(node);
2013 block = be_transform_node(src_block);
2014 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2016 if(get_mode_size_bits(mode) == 8) {
2017 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2020 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
2023 set_address(new_node, addr);
2024 set_ia32_op_type(new_node, ia32_AddrModeD);
2025 set_ia32_ls_mode(new_node, mode);
2026 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2028 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2029 mem_proj = be_transform_node(am.mem_proj);
2030 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2035 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2036 ir_node *ptr, ir_mode *mode,
2037 construct_unop_dest_func *func)
2039 ir_graph *irg = current_ir_graph;
2040 ir_node *src_block = get_nodes_block(node);
2046 ia32_address_mode_t am;
2047 ia32_address_t *addr = &am.addr;
2048 memset(&am, 0, sizeof(am));
2050 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2053 build_address(&am, op);
2055 dbgi = get_irn_dbg_info(node);
2056 block = be_transform_node(src_block);
2057 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2058 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
2059 set_address(new_node, addr);
2060 set_ia32_op_type(new_node, ia32_AddrModeD);
2061 set_ia32_ls_mode(new_node, mode);
2062 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2064 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2065 mem_proj = be_transform_node(am.mem_proj);
2066 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2071 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2072 ir_mode *mode = get_irn_mode(node);
2073 ir_node *mux_true = get_Mux_true(node);
2074 ir_node *mux_false = get_Mux_false(node);
2085 ia32_address_t addr;
2087 if(get_mode_size_bits(mode) != 8)
2090 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2092 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2098 build_address_ptr(&addr, ptr, mem);
2100 irg = current_ir_graph;
2101 dbgi = get_irn_dbg_info(node);
2102 block = get_nodes_block(node);
2103 new_block = be_transform_node(block);
2104 cond = get_Mux_sel(node);
2105 flags = get_flags_node(cond, &pnc);
2106 new_mem = be_transform_node(mem);
2107 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2108 addr.index, addr.mem, flags, pnc, negated);
2109 set_address(new_node, &addr);
2110 set_ia32_op_type(new_node, ia32_AddrModeD);
2111 set_ia32_ls_mode(new_node, mode);
2112 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2117 static ir_node *try_create_dest_am(ir_node *node) {
2118 ir_node *val = get_Store_value(node);
2119 ir_node *mem = get_Store_mem(node);
2120 ir_node *ptr = get_Store_ptr(node);
2121 ir_mode *mode = get_irn_mode(val);
2122 unsigned bits = get_mode_size_bits(mode);
2127 /* handle only GP modes for now... */
2128 if(!ia32_mode_needs_gp_reg(mode))
2132 /* store must be the only user of the val node */
2133 if(get_irn_n_edges(val) > 1)
2135 /* skip pointless convs */
2137 ir_node *conv_op = get_Conv_op(val);
2138 ir_mode *pred_mode = get_irn_mode(conv_op);
2139 if (!ia32_mode_needs_gp_reg(pred_mode))
2141 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2149 /* value must be in the same block */
2150 if(get_nodes_block(node) != get_nodes_block(val))
2153 switch (get_irn_opcode(val)) {
2155 op1 = get_Add_left(val);
2156 op2 = get_Add_right(val);
2157 if(is_Const_1(op2)) {
2158 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2159 new_rd_ia32_IncMem);
2161 } else if(is_Const_Minus_1(op2)) {
2162 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2163 new_rd_ia32_DecMem);
2166 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2167 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2168 match_dest_am | match_commutative |
2172 op1 = get_Sub_left(val);
2173 op2 = get_Sub_right(val);
2174 if (is_Const(op2)) {
2175 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2177 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2178 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2179 match_dest_am | match_immediate |
2183 op1 = get_And_left(val);
2184 op2 = get_And_right(val);
2185 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2186 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2187 match_dest_am | match_commutative |
2191 op1 = get_Or_left(val);
2192 op2 = get_Or_right(val);
2193 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2194 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2195 match_dest_am | match_commutative |
2199 op1 = get_Eor_left(val);
2200 op2 = get_Eor_right(val);
2201 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2202 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2203 match_dest_am | match_commutative |
2207 op1 = get_Shl_left(val);
2208 op2 = get_Shl_right(val);
2209 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2210 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2211 match_dest_am | match_immediate);
2214 op1 = get_Shr_left(val);
2215 op2 = get_Shr_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2218 match_dest_am | match_immediate);
2221 op1 = get_Shrs_left(val);
2222 op2 = get_Shrs_right(val);
2223 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2224 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2225 match_dest_am | match_immediate);
2228 op1 = get_Rotl_left(val);
2229 op2 = get_Rotl_right(val);
2230 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2231 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2232 match_dest_am | match_immediate);
2234 /* TODO: match ROR patterns... */
2236 new_node = try_create_SetMem(val, ptr, mem);
2239 op1 = get_Minus_op(val);
2240 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2243 /* should be lowered already */
2244 assert(mode != mode_b);
2245 op1 = get_Not_op(val);
2246 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2252 if(new_node != NULL) {
2253 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2254 get_irn_pinned(node) == op_pin_state_pinned) {
2255 set_irn_pinned(new_node, op_pin_state_pinned);
2262 static int is_float_to_int32_conv(const ir_node *node)
2264 ir_mode *mode = get_irn_mode(node);
2268 if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
2270 /* don't report unsigned as conv to 32bit, because we really need to do
2271 * a vfist with 64bit signed in this case */
2272 if(!mode_is_signed(mode))
2277 conv_op = get_Conv_op(node);
2278 conv_mode = get_irn_mode(conv_op);
2280 if(!mode_is_float(conv_mode))
2287 * Transform a Store(floatConst).
2289 * @return the created ia32 Store node
2291 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2293 ir_mode *mode = get_irn_mode(cns);
2294 unsigned size = get_mode_size_bytes(mode);
2295 tarval *tv = get_Const_tarval(cns);
2296 ir_node *block = get_nodes_block(node);
2297 ir_node *new_block = be_transform_node(block);
2298 ir_node *ptr = get_Store_ptr(node);
2299 ir_node *mem = get_Store_mem(node);
2300 ir_graph *irg = current_ir_graph;
2301 dbg_info *dbgi = get_irn_dbg_info(node);
2305 ia32_address_t addr;
2307 assert(size % 4 == 0);
2310 build_address_ptr(&addr, ptr, mem);
2314 get_tarval_sub_bits(tv, ofs) |
2315 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2316 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2317 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2318 ir_node *imm = create_Immediate(NULL, 0, val);
2320 ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2321 addr.index, addr.mem, imm);
2323 set_irn_pinned(new_node, get_irn_pinned(node));
2324 set_ia32_op_type(new_node, ia32_AddrModeD);
2325 set_ia32_ls_mode(new_node, mode_Iu);
2326 set_address(new_node, &addr);
2327 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2329 ins[i++] = new_node;
2334 } while (size != 0);
2336 return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
2340 * Generate a vfist or vfisttp instruction.
2342 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2343 ir_node *mem, ir_node *val, ir_node **fist)
2347 if (ia32_cg_config.use_fisttp) {
2348 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2349 if other users exists */
2350 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2351 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2352 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2353 be_new_Keep(reg_class, irg, block, 1, &value);
2355 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2358 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2361 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2367 * Transforms a normal Store.
2369 * @return the created ia32 Store node
2371 static ir_node *gen_normal_Store(ir_node *node)
2373 ir_node *val = get_Store_value(node);
2374 ir_mode *mode = get_irn_mode(val);
2375 ir_node *block = get_nodes_block(node);
2376 ir_node *new_block = be_transform_node(block);
2377 ir_node *ptr = get_Store_ptr(node);
2378 ir_node *mem = get_Store_mem(node);
2379 ir_graph *irg = current_ir_graph;
2380 dbg_info *dbgi = get_irn_dbg_info(node);
2381 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2382 ir_node *new_val, *new_node, *store;
2383 ia32_address_t addr;
2385 /* check for destination address mode */
2386 new_node = try_create_dest_am(node);
2387 if (new_node != NULL)
2390 /* construct store address */
2391 memset(&addr, 0, sizeof(addr));
2392 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2394 if (addr.base == NULL) {
2397 addr.base = be_transform_node(addr.base);
2400 if (addr.index == NULL) {
2403 addr.index = be_transform_node(addr.index);
2405 addr.mem = be_transform_node(mem);
2407 if (mode_is_float(mode)) {
2408 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2410 while (is_Conv(val) && mode == get_irn_mode(val)) {
2411 ir_node *op = get_Conv_op(val);
2412 if (!mode_is_float(get_irn_mode(op)))
2416 new_val = be_transform_node(val);
2417 if (ia32_cg_config.use_sse2) {
2418 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2419 addr.index, addr.mem, new_val);
2421 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2422 addr.index, addr.mem, new_val, mode);
2425 } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
2426 val = get_Conv_op(val);
2428 /* TODO: is this optimisation still necessary at all (middleend)? */
2429 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2430 while (is_Conv(val)) {
2431 ir_node *op = get_Conv_op(val);
2432 if (!mode_is_float(get_irn_mode(op)))
2434 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2438 new_val = be_transform_node(val);
2439 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2441 new_val = create_immediate_or_transform(val, 0);
2442 assert(mode != mode_b);
2444 if (get_mode_size_bits(mode) == 8) {
2445 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2446 addr.index, addr.mem, new_val);
2448 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2449 addr.index, addr.mem, new_val);
2454 set_irn_pinned(store, get_irn_pinned(node));
2455 set_ia32_op_type(store, ia32_AddrModeD);
2456 set_ia32_ls_mode(store, mode);
2458 set_address(store, &addr);
2459 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2465 * Transforms a Store.
2467 * @return the created ia32 Store node
2469 static ir_node *gen_Store(ir_node *node)
2471 ir_node *val = get_Store_value(node);
2472 ir_mode *mode = get_irn_mode(val);
2474 if (mode_is_float(mode) && is_Const(val)) {
2477 /* we are storing a floating point constant */
2478 if (ia32_cg_config.use_sse2) {
2479 transform = !is_simple_sse_Const(val);
2481 transform = !is_simple_x87_Const(val);
2484 return gen_float_const_Store(node, val);
2486 return gen_normal_Store(node);
2490 * Transforms a Switch.
2492 * @return the created ia32 SwitchJmp node
2494 static ir_node *create_Switch(ir_node *node)
2496 ir_graph *irg = current_ir_graph;
2497 dbg_info *dbgi = get_irn_dbg_info(node);
2498 ir_node *block = be_transform_node(get_nodes_block(node));
2499 ir_node *sel = get_Cond_selector(node);
2500 ir_node *new_sel = be_transform_node(sel);
2501 int switch_min = INT_MAX;
2502 int switch_max = INT_MIN;
2503 long default_pn = get_Cond_defaultProj(node);
2505 const ir_edge_t *edge;
2507 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2509 /* determine the smallest switch case value */
2510 foreach_out_edge(node, edge) {
2511 ir_node *proj = get_edge_src_irn(edge);
2512 long pn = get_Proj_proj(proj);
2513 if(pn == default_pn)
2522 if((unsigned) (switch_max - switch_min) > 256000) {
2523 panic("Size of switch %+F bigger than 256000", node);
2526 if (switch_min != 0) {
2527 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2529 /* if smallest switch case is not 0 we need an additional sub */
2530 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2531 add_ia32_am_offs_int(new_sel, -switch_min);
2532 set_ia32_op_type(new_sel, ia32_AddrModeS);
2534 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2537 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2538 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2544 * Transform a Cond node.
2546 static ir_node *gen_Cond(ir_node *node) {
2547 ir_node *block = get_nodes_block(node);
2548 ir_node *new_block = be_transform_node(block);
2549 ir_graph *irg = current_ir_graph;
2550 dbg_info *dbgi = get_irn_dbg_info(node);
2551 ir_node *sel = get_Cond_selector(node);
2552 ir_mode *sel_mode = get_irn_mode(sel);
2553 ir_node *flags = NULL;
2557 if (sel_mode != mode_b) {
2558 return create_Switch(node);
2561 /* we get flags from a Cmp */
2562 flags = get_flags_node(sel, &pnc);
2564 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2565 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2570 static ir_node *gen_be_Copy(ir_node *node)
2572 ir_node *new_node = be_duplicate_node(node);
2573 ir_mode *mode = get_irn_mode(new_node);
2575 if (ia32_mode_needs_gp_reg(mode)) {
2576 set_irn_mode(new_node, mode_Iu);
2582 static ir_node *create_Fucom(ir_node *node)
2584 ir_graph *irg = current_ir_graph;
2585 dbg_info *dbgi = get_irn_dbg_info(node);
2586 ir_node *block = get_nodes_block(node);
2587 ir_node *new_block = be_transform_node(block);
2588 ir_node *left = get_Cmp_left(node);
2589 ir_node *new_left = be_transform_node(left);
2590 ir_node *right = get_Cmp_right(node);
2594 if(ia32_cg_config.use_fucomi) {
2595 new_right = be_transform_node(right);
2596 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2598 set_ia32_commutative(new_node);
2599 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2601 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2602 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2605 new_right = be_transform_node(right);
2606 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2610 set_ia32_commutative(new_node);
2612 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2614 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2615 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2621 static ir_node *create_Ucomi(ir_node *node)
2623 ir_graph *irg = current_ir_graph;
2624 dbg_info *dbgi = get_irn_dbg_info(node);
2625 ir_node *src_block = get_nodes_block(node);
2626 ir_node *new_block = be_transform_node(src_block);
2627 ir_node *left = get_Cmp_left(node);
2628 ir_node *right = get_Cmp_right(node);
2630 ia32_address_mode_t am;
2631 ia32_address_t *addr = &am.addr;
2633 match_arguments(&am, src_block, left, right, NULL,
2634 match_commutative | match_am);
2636 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2637 addr->mem, am.new_op1, am.new_op2,
2639 set_am_attributes(new_node, &am);
2641 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2643 new_node = fix_mem_proj(new_node, &am);
2649 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2650 * to fold an and into a test node
2652 static bool can_fold_test_and(ir_node *node)
2654 const ir_edge_t *edge;
2656 /** we can only have eq and lg projs */
2657 foreach_out_edge(node, edge) {
2658 ir_node *proj = get_edge_src_irn(edge);
2659 pn_Cmp pnc = get_Proj_proj(proj);
2660 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2668 * returns true if it is assured, that the upper bits of a node are "clean"
2669 * which means for a 16 or 8 bit value, that the upper bits in the register
2670 * are 0 for unsigned and a copy of the last significant bit for unsigned
2673 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2675 assert(ia32_mode_needs_gp_reg(mode));
2676 if (get_mode_size_bits(mode) >= 32)
2679 if (is_Proj(transformed_node))
2680 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2682 if (is_ia32_Conv_I2I(transformed_node)
2683 || is_ia32_Conv_I2I8Bit(transformed_node)) {
2684 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2685 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2687 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2693 if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
2694 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2695 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2696 const ia32_immediate_attr_t *attr
2697 = get_ia32_immediate_attr_const(right);
2698 if (attr->symconst == 0
2699 && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
2703 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2706 if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
2707 ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
2708 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2709 const ia32_immediate_attr_t *attr
2710 = get_ia32_immediate_attr_const(right);
2711 if (attr->symconst == 0
2712 && (unsigned) attr->offset
2713 <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
2720 /* TODO recurse on Or, Xor, ... if appropriate? */
2722 if (is_ia32_Immediate(transformed_node)
2723 || is_ia32_Const(transformed_node)) {
2724 const ia32_immediate_attr_t *attr
2725 = get_ia32_immediate_attr_const(transformed_node);
2726 if (mode_is_signed(mode)) {
2727 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2728 if (shifted == 0 || shifted == -1)
2731 unsigned long shifted = (unsigned long) attr->offset;
2732 shifted >>= get_mode_size_bits(mode);
2742 * Generate code for a Cmp.
2744 static ir_node *gen_Cmp(ir_node *node)
2746 ir_graph *irg = current_ir_graph;
2747 dbg_info *dbgi = get_irn_dbg_info(node);
2748 ir_node *block = get_nodes_block(node);
2749 ir_node *new_block = be_transform_node(block);
2750 ir_node *left = get_Cmp_left(node);
2751 ir_node *right = get_Cmp_right(node);
2752 ir_mode *cmp_mode = get_irn_mode(left);
2754 ia32_address_mode_t am;
2755 ia32_address_t *addr = &am.addr;
2758 if(mode_is_float(cmp_mode)) {
2759 if (ia32_cg_config.use_sse2) {
2760 return create_Ucomi(node);
2762 return create_Fucom(node);
2766 assert(ia32_mode_needs_gp_reg(cmp_mode));
2768 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2769 cmp_unsigned = !mode_is_signed(cmp_mode);
2770 if (is_Const_0(right) &&
2772 get_irn_n_edges(left) == 1 &&
2773 can_fold_test_and(node)) {
2774 /* Test(and_left, and_right) */
2775 ir_node *and_left = get_And_left(left);
2776 ir_node *and_right = get_And_right(left);
2778 /* matze: code here used mode instead of cmd_mode, I think it is always
2779 * the same as cmp_mode, but I leave this here to see if this is really
2782 assert(get_irn_mode(and_left) == cmp_mode);
2784 match_arguments(&am, block, and_left, and_right, NULL,
2786 match_am | match_8bit_am | match_16bit_am |
2787 match_am_and_immediates | match_immediate |
2788 match_8bit | match_16bit);
2790 /* use 32bit compare mode if possible since the opcode is smaller */
2791 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2792 upper_bits_clean(am.new_op2, cmp_mode)) {
2793 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2796 if (get_mode_size_bits(cmp_mode) == 8) {
2797 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2798 addr->index, addr->mem, am.new_op1,
2799 am.new_op2, am.ins_permuted,
2802 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2803 addr->index, addr->mem, am.new_op1,
2804 am.new_op2, am.ins_permuted,
2808 /* Cmp(left, right) */
2809 match_arguments(&am, block, left, right, NULL,
2810 match_commutative | match_am | match_8bit_am |
2811 match_16bit_am | match_am_and_immediates |
2812 match_immediate | match_8bit | match_16bit);
2813 /* use 32bit compare mode if possible since the opcode is smaller */
2814 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2815 upper_bits_clean(am.new_op2, cmp_mode)) {
2816 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2819 if (get_mode_size_bits(cmp_mode) == 8) {
2820 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2821 addr->index, addr->mem, am.new_op1,
2822 am.new_op2, am.ins_permuted,
2825 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2826 addr->index, addr->mem, am.new_op1,
2827 am.new_op2, am.ins_permuted, cmp_unsigned);
2830 set_am_attributes(new_node, &am);
2831 set_ia32_ls_mode(new_node, cmp_mode);
2833 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2835 new_node = fix_mem_proj(new_node, &am);
2840 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2843 ir_graph *irg = current_ir_graph;
2844 dbg_info *dbgi = get_irn_dbg_info(node);
2845 ir_node *block = get_nodes_block(node);
2846 ir_node *new_block = be_transform_node(block);
2847 ir_node *val_true = get_Mux_true(node);
2848 ir_node *val_false = get_Mux_false(node);
2850 match_flags_t match_flags;
2851 ia32_address_mode_t am;
2852 ia32_address_t *addr;
2854 assert(ia32_cg_config.use_cmov);
2855 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2859 match_flags = match_commutative | match_am | match_16bit_am |
2862 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2864 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2865 addr->mem, am.new_op1, am.new_op2, new_flags,
2866 am.ins_permuted, pnc);
2867 set_am_attributes(new_node, &am);
2869 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2871 new_node = fix_mem_proj(new_node, &am);
2877 * Creates a ia32 Setcc instruction.
2879 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2880 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2883 ir_graph *irg = current_ir_graph;
2884 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2885 ir_node *nomem = new_NoMem();
2886 ir_mode *mode = get_irn_mode(orig_node);
2889 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2890 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2892 /* we might need to conv the result up */
2893 if (get_mode_size_bits(mode) > 8) {
2894 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2895 nomem, new_node, mode_Bu);
2896 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2903 * Create instruction for an unsigned Difference or Zero.
2905 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2906 ir_graph *irg = current_ir_graph;
2907 ir_mode *mode = get_irn_mode(psi);
2908 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2911 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2912 match_mode_neutral | match_am | match_immediate | match_two_users);
2914 block = get_nodes_block(new_node);
2916 if (is_Proj(new_node)) {
2917 sub = get_Proj_pred(new_node);
2918 assert(is_ia32_Sub(sub));
2921 set_irn_mode(sub, mode_T);
2922 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2924 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2926 dbgi = get_irn_dbg_info(psi);
2927 noreg = ia32_new_NoReg_gp(env_cg);
2928 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2929 nomem = new_NoMem();
2930 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2932 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2933 set_ia32_commutative(new_node);
2938 * Transforms a Mux node into CMov.
2940 * @return The transformed node.
2942 static ir_node *gen_Mux(ir_node *node)
2944 dbg_info *dbgi = get_irn_dbg_info(node);
2945 ir_node *block = get_nodes_block(node);
2946 ir_node *new_block = be_transform_node(block);
2947 ir_node *mux_true = get_Mux_true(node);
2948 ir_node *mux_false = get_Mux_false(node);
2949 ir_node *cond = get_Mux_sel(node);
2950 ir_mode *mode = get_irn_mode(node);
2953 assert(get_irn_mode(cond) == mode_b);
2955 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2956 if (mode_is_float(mode)) {
2957 ir_node *cmp = get_Proj_pred(cond);
2958 ir_node *cmp_left = get_Cmp_left(cmp);
2959 ir_node *cmp_right = get_Cmp_right(cmp);
2960 pn_Cmp pnc = get_Proj_proj(cond);
2962 if (ia32_cg_config.use_sse2) {
2963 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2964 if (cmp_left == mux_true && cmp_right == mux_false) {
2965 /* Mux(a <= b, a, b) => MIN */
2966 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2967 match_commutative | match_am | match_two_users);
2968 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2969 /* Mux(a <= b, b, a) => MAX */
2970 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2971 match_commutative | match_am | match_two_users);
2973 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2974 if (cmp_left == mux_true && cmp_right == mux_false) {
2975 /* Mux(a >= b, a, b) => MAX */
2976 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2977 match_commutative | match_am | match_two_users);
2978 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2979 /* Mux(a >= b, b, a) => MIN */
2980 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2981 match_commutative | match_am | match_two_users);
2985 panic("cannot transform floating point Mux");
2991 assert(ia32_mode_needs_gp_reg(mode));
2993 if (is_Proj(cond)) {
2994 ir_node *cmp = get_Proj_pred(cond);
2996 ir_node *cmp_left = get_Cmp_left(cmp);
2997 ir_node *cmp_right = get_Cmp_right(cmp);
2998 pn_Cmp pnc = get_Proj_proj(cond);
3000 /* check for unsigned Doz first */
3001 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3002 is_Const_0(mux_false) && is_Sub(mux_true) &&
3003 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3004 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3005 return create_Doz(node, cmp_left, cmp_right);
3006 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3007 is_Const_0(mux_true) && is_Sub(mux_false) &&
3008 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3009 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3010 return create_Doz(node, cmp_left, cmp_right);
3015 flags = get_flags_node(cond, &pnc);
3017 if (is_Const(mux_true) && is_Const(mux_false)) {
3018 /* both are const, good */
3019 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3020 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3021 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3022 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3024 /* Not that simple. */
3029 new_node = create_CMov(node, cond, flags, pnc);
3037 * Create a conversion from x87 state register to general purpose.
3039 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
3040 ir_node *block = be_transform_node(get_nodes_block(node));
3041 ir_node *op = get_Conv_op(node);
3042 ir_node *new_op = be_transform_node(op);
3043 ia32_code_gen_t *cg = env_cg;
3044 ir_graph *irg = current_ir_graph;
3045 dbg_info *dbgi = get_irn_dbg_info(node);
3046 ir_node *noreg = ia32_new_NoReg_gp(cg);
3047 ir_mode *mode = get_irn_mode(node);
3048 ir_node *fist, *load, *mem;
3050 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3051 set_irn_pinned(fist, op_pin_state_floats);
3052 set_ia32_use_frame(fist);
3053 set_ia32_op_type(fist, ia32_AddrModeD);
3055 assert(get_mode_size_bits(mode) <= 32);
3056 /* exception we can only store signed 32 bit integers, so for unsigned
3057 we store a 64bit (signed) integer and load the lower bits */
3058 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3059 set_ia32_ls_mode(fist, mode_Ls);
3061 set_ia32_ls_mode(fist, mode_Is);
3063 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3066 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3068 set_irn_pinned(load, op_pin_state_floats);
3069 set_ia32_use_frame(load);
3070 set_ia32_op_type(load, ia32_AddrModeS);
3071 set_ia32_ls_mode(load, mode_Is);
3072 if(get_ia32_ls_mode(fist) == mode_Ls) {
3073 ia32_attr_t *attr = get_ia32_attr(load);
3074 attr->data.need_64bit_stackent = 1;
3076 ia32_attr_t *attr = get_ia32_attr(load);
3077 attr->data.need_32bit_stackent = 1;
3079 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3081 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3085 * Creates a x87 strict Conv by placing a Store and a Load
3087 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3089 ir_node *block = get_nodes_block(node);
3090 ir_graph *irg = current_ir_graph;
3091 dbg_info *dbgi = get_irn_dbg_info(node);
3092 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3093 ir_node *nomem = new_NoMem();
3094 ir_node *frame = get_irg_frame(irg);
3095 ir_node *store, *load;
3098 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3100 set_ia32_use_frame(store);
3101 set_ia32_op_type(store, ia32_AddrModeD);
3102 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3104 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3106 set_ia32_use_frame(load);
3107 set_ia32_op_type(load, ia32_AddrModeS);
3108 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3110 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3115 * Create a conversion from general purpose to x87 register
3117 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3118 ir_node *src_block = get_nodes_block(node);
3119 ir_node *block = be_transform_node(src_block);
3120 ir_graph *irg = current_ir_graph;
3121 dbg_info *dbgi = get_irn_dbg_info(node);
3122 ir_node *op = get_Conv_op(node);
3123 ir_node *new_op = NULL;
3127 ir_mode *store_mode;
3133 /* fild can use source AM if the operand is a signed 32bit integer */
3134 if (src_mode == mode_Is) {
3135 ia32_address_mode_t am;
3137 match_arguments(&am, src_block, NULL, op, NULL,
3138 match_am | match_try_am);
3139 if (am.op_type == ia32_AddrModeS) {
3140 ia32_address_t *addr = &am.addr;
3142 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3143 addr->index, addr->mem);
3144 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3147 set_am_attributes(fild, &am);
3148 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3150 fix_mem_proj(fild, &am);
3155 if(new_op == NULL) {
3156 new_op = be_transform_node(op);
3159 noreg = ia32_new_NoReg_gp(env_cg);
3160 nomem = new_NoMem();
3161 mode = get_irn_mode(op);
3163 /* first convert to 32 bit signed if necessary */
3164 src_bits = get_mode_size_bits(src_mode);
3165 if (src_bits == 8) {
3166 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3168 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3170 } else if (src_bits < 32) {
3171 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3173 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3177 assert(get_mode_size_bits(mode) == 32);
3180 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3183 set_ia32_use_frame(store);
3184 set_ia32_op_type(store, ia32_AddrModeD);
3185 set_ia32_ls_mode(store, mode_Iu);
3187 /* exception for 32bit unsigned, do a 64bit spill+load */
3188 if(!mode_is_signed(mode)) {
3191 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3193 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3194 get_irg_frame(irg), noreg, nomem,
3197 set_ia32_use_frame(zero_store);
3198 set_ia32_op_type(zero_store, ia32_AddrModeD);
3199 add_ia32_am_offs_int(zero_store, 4);
3200 set_ia32_ls_mode(zero_store, mode_Iu);
3205 store = new_rd_Sync(dbgi, irg, block, 2, in);
3206 store_mode = mode_Ls;
3208 store_mode = mode_Is;
3212 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3214 set_ia32_use_frame(fild);
3215 set_ia32_op_type(fild, ia32_AddrModeS);
3216 set_ia32_ls_mode(fild, store_mode);
3218 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3224 * Create a conversion from one integer mode into another one
3226 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3227 dbg_info *dbgi, ir_node *block, ir_node *op,
3230 ir_graph *irg = current_ir_graph;
3231 int src_bits = get_mode_size_bits(src_mode);
3232 int tgt_bits = get_mode_size_bits(tgt_mode);
3233 ir_node *new_block = be_transform_node(block);
3235 ir_mode *smaller_mode;
3237 ia32_address_mode_t am;
3238 ia32_address_t *addr = &am.addr;
3241 if (src_bits < tgt_bits) {
3242 smaller_mode = src_mode;
3243 smaller_bits = src_bits;
3245 smaller_mode = tgt_mode;
3246 smaller_bits = tgt_bits;
3249 #ifdef DEBUG_libfirm
3251 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3256 match_arguments(&am, block, NULL, op, NULL,
3257 match_8bit | match_16bit |
3258 match_am | match_8bit_am | match_16bit_am);
3260 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3261 /* unnecessary conv. in theory it shouldn't have been AM */
3262 assert(is_ia32_NoReg_GP(addr->base));
3263 assert(is_ia32_NoReg_GP(addr->index));
3264 assert(is_NoMem(addr->mem));
3265 assert(am.addr.offset == 0);
3266 assert(am.addr.symconst_ent == NULL);
3270 if (smaller_bits == 8) {
3271 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3272 addr->index, addr->mem, am.new_op2,
3275 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3276 addr->index, addr->mem, am.new_op2,
3279 set_am_attributes(new_node, &am);
3280 /* match_arguments assume that out-mode = in-mode, this isn't true here
3282 set_ia32_ls_mode(new_node, smaller_mode);
3283 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3284 new_node = fix_mem_proj(new_node, &am);
3289 * Transforms a Conv node.
3291 * @return The created ia32 Conv node
3293 static ir_node *gen_Conv(ir_node *node) {
3294 ir_node *block = get_nodes_block(node);
3295 ir_node *new_block = be_transform_node(block);
3296 ir_node *op = get_Conv_op(node);
3297 ir_node *new_op = NULL;
3298 ir_graph *irg = current_ir_graph;
3299 dbg_info *dbgi = get_irn_dbg_info(node);
3300 ir_mode *src_mode = get_irn_mode(op);
3301 ir_mode *tgt_mode = get_irn_mode(node);
3302 int src_bits = get_mode_size_bits(src_mode);
3303 int tgt_bits = get_mode_size_bits(tgt_mode);
3304 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3305 ir_node *nomem = new_rd_NoMem(irg);
3306 ir_node *res = NULL;
3308 if (src_mode == mode_b) {
3309 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3310 /* nothing to do, we already model bools as 0/1 ints */
3311 return be_transform_node(op);
3314 if (src_mode == tgt_mode) {
3315 if (get_Conv_strict(node)) {
3316 if (ia32_cg_config.use_sse2) {
3317 /* when we are in SSE mode, we can kill all strict no-op conversion */
3318 return be_transform_node(op);
3321 /* this should be optimized already, but who knows... */
3322 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3323 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3324 return be_transform_node(op);
3328 if (mode_is_float(src_mode)) {
3329 new_op = be_transform_node(op);
3330 /* we convert from float ... */
3331 if (mode_is_float(tgt_mode)) {
3332 if(src_mode == mode_E && tgt_mode == mode_D
3333 && !get_Conv_strict(node)) {
3334 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3339 if (ia32_cg_config.use_sse2) {
3340 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3341 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3343 set_ia32_ls_mode(res, tgt_mode);
3345 if(get_Conv_strict(node)) {
3346 res = gen_x87_strict_conv(tgt_mode, new_op);
3347 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3350 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3355 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3356 if (ia32_cg_config.use_sse2) {
3357 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3359 set_ia32_ls_mode(res, src_mode);
3361 return gen_x87_fp_to_gp(node);
3365 /* we convert from int ... */
3366 if (mode_is_float(tgt_mode)) {
3368 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3369 if (ia32_cg_config.use_sse2) {
3370 new_op = be_transform_node(op);
3371 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3373 set_ia32_ls_mode(res, tgt_mode);
3375 res = gen_x87_gp_to_fp(node, src_mode);
3376 if(get_Conv_strict(node)) {
3377 /* The strict-Conv is only necessary, if the int mode has more bits
3378 * than the float mantissa */
3379 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3380 size_t float_mantissa;
3381 /* FIXME There is no way to get the mantissa size of a mode */
3382 switch (get_mode_size_bits(tgt_mode)) {
3383 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3384 case 64: float_mantissa = 52 + 1; break;
3386 case 96: float_mantissa = 64; break;
3387 default: float_mantissa = 0; break;
3389 if (float_mantissa < int_mantissa) {
3390 res = gen_x87_strict_conv(tgt_mode, res);
3391 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3396 } else if(tgt_mode == mode_b) {
3397 /* mode_b lowering already took care that we only have 0/1 values */
3398 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3399 src_mode, tgt_mode));
3400 return be_transform_node(op);
3403 if (src_bits == tgt_bits) {
3404 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3405 src_mode, tgt_mode));
3406 return be_transform_node(op);
3409 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3417 static ir_node *create_immediate_or_transform(ir_node *node,
3418 char immediate_constraint_type)
3420 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3421 if (new_node == NULL) {
3422 new_node = be_transform_node(node);
3428 * Transforms a FrameAddr into an ia32 Add.
3430 static ir_node *gen_be_FrameAddr(ir_node *node) {
3431 ir_node *block = be_transform_node(get_nodes_block(node));
3432 ir_node *op = be_get_FrameAddr_frame(node);
3433 ir_node *new_op = be_transform_node(op);
3434 ir_graph *irg = current_ir_graph;
3435 dbg_info *dbgi = get_irn_dbg_info(node);
3436 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3439 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3440 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3441 set_ia32_use_frame(new_node);
3443 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3449 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3451 static ir_node *gen_be_Return(ir_node *node) {
3452 ir_graph *irg = current_ir_graph;
3453 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3454 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3455 ir_entity *ent = get_irg_entity(irg);
3456 ir_type *tp = get_entity_type(ent);
3461 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3462 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3465 int pn_ret_val, pn_ret_mem, arity, i;
3467 assert(ret_val != NULL);
3468 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3469 return be_duplicate_node(node);
3472 res_type = get_method_res_type(tp, 0);
3474 if (! is_Primitive_type(res_type)) {
3475 return be_duplicate_node(node);
3478 mode = get_type_mode(res_type);
3479 if (! mode_is_float(mode)) {
3480 return be_duplicate_node(node);
3483 assert(get_method_n_ress(tp) == 1);
3485 pn_ret_val = get_Proj_proj(ret_val);
3486 pn_ret_mem = get_Proj_proj(ret_mem);
3488 /* get the Barrier */
3489 barrier = get_Proj_pred(ret_val);
3491 /* get result input of the Barrier */
3492 ret_val = get_irn_n(barrier, pn_ret_val);
3493 new_ret_val = be_transform_node(ret_val);
3495 /* get memory input of the Barrier */
3496 ret_mem = get_irn_n(barrier, pn_ret_mem);
3497 new_ret_mem = be_transform_node(ret_mem);
3499 frame = get_irg_frame(irg);
3501 dbgi = get_irn_dbg_info(barrier);
3502 block = be_transform_node(get_nodes_block(barrier));
3504 noreg = ia32_new_NoReg_gp(env_cg);
3506 /* store xmm0 onto stack */
3507 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3508 new_ret_mem, new_ret_val);
3509 set_ia32_ls_mode(sse_store, mode);
3510 set_ia32_op_type(sse_store, ia32_AddrModeD);
3511 set_ia32_use_frame(sse_store);
3513 /* load into x87 register */
3514 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3515 set_ia32_op_type(fld, ia32_AddrModeS);
3516 set_ia32_use_frame(fld);
3518 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3519 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3521 /* create a new barrier */
3522 arity = get_irn_arity(barrier);
3523 in = alloca(arity * sizeof(in[0]));
3524 for (i = 0; i < arity; ++i) {
3527 if (i == pn_ret_val) {
3529 } else if (i == pn_ret_mem) {
3532 ir_node *in = get_irn_n(barrier, i);
3533 new_in = be_transform_node(in);
3538 new_barrier = new_ir_node(dbgi, irg, block,
3539 get_irn_op(barrier), get_irn_mode(barrier),
3541 copy_node_attr(barrier, new_barrier);
3542 be_duplicate_deps(barrier, new_barrier);
3543 set_transformed_and_mark(barrier, new_barrier);
3545 /* transform normally */
3546 return be_duplicate_node(node);
3550 * Transform a be_AddSP into an ia32_SubSP.
3552 static ir_node *gen_be_AddSP(ir_node *node)
3554 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3555 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3557 return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
3558 match_am | match_immediate);
3562 * Transform a be_SubSP into an ia32_AddSP
3564 static ir_node *gen_be_SubSP(ir_node *node)
3566 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3567 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3569 return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
3570 match_am | match_immediate);
3574 * Change some phi modes
3576 static ir_node *gen_Phi(ir_node *node) {
3577 ir_node *block = be_transform_node(get_nodes_block(node));
3578 ir_graph *irg = current_ir_graph;
3579 dbg_info *dbgi = get_irn_dbg_info(node);
3580 ir_mode *mode = get_irn_mode(node);
3583 if(ia32_mode_needs_gp_reg(mode)) {
3584 /* we shouldn't have any 64bit stuff around anymore */
3585 assert(get_mode_size_bits(mode) <= 32);
3586 /* all integer operations are on 32bit registers now */
3588 } else if(mode_is_float(mode)) {
3589 if (ia32_cg_config.use_sse2) {
3596 /* phi nodes allow loops, so we use the old arguments for now
3597 * and fix this later */
3598 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3599 get_irn_in(node) + 1);
3600 copy_node_attr(node, phi);
3601 be_duplicate_deps(node, phi);
3603 be_set_transformed_node(node, phi);
3604 be_enqueue_preds(node);
3612 static ir_node *gen_IJmp(ir_node *node)
3614 ir_node *block = get_nodes_block(node);
3615 ir_node *new_block = be_transform_node(block);
3616 dbg_info *dbgi = get_irn_dbg_info(node);
3617 ir_node *op = get_IJmp_target(node);
3619 ia32_address_mode_t am;
3620 ia32_address_t *addr = &am.addr;
3622 assert(get_irn_mode(op) == mode_P);
3624 match_arguments(&am, block, NULL, op, NULL,
3625 match_am | match_8bit_am | match_16bit_am |
3626 match_immediate | match_8bit | match_16bit);
3628 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3629 addr->base, addr->index, addr->mem,
3631 set_am_attributes(new_node, &am);
3632 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3634 new_node = fix_mem_proj(new_node, &am);
3640 * Transform a Bound node.
3642 static ir_node *gen_Bound(ir_node *node)
3645 ir_node *lower = get_Bound_lower(node);
3646 dbg_info *dbgi = get_irn_dbg_info(node);
3648 if (is_Const_0(lower)) {
3649 /* typical case for Java */
3650 ir_node *sub, *res, *flags, *block;
3651 ir_graph *irg = current_ir_graph;
3653 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3654 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3656 block = get_nodes_block(res);
3657 if (! is_Proj(res)) {
3659 set_irn_mode(sub, mode_T);
3660 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3662 sub = get_Proj_pred(res);
3664 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3665 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3666 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3668 panic("generic Bound not supported in ia32 Backend");
3674 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3676 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3677 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3679 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3680 match_immediate | match_mode_neutral);
3683 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3685 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3686 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3687 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3691 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3693 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3694 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3695 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3699 static ir_node *gen_ia32_l_Add(ir_node *node) {
3700 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3701 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3702 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3703 match_commutative | match_am | match_immediate |
3704 match_mode_neutral);
3706 if(is_Proj(lowered)) {
3707 lowered = get_Proj_pred(lowered);
3709 assert(is_ia32_Add(lowered));
3710 set_irn_mode(lowered, mode_T);
3716 static ir_node *gen_ia32_l_Adc(ir_node *node)
3718 return gen_binop_flags(node, new_rd_ia32_Adc,
3719 match_commutative | match_am | match_immediate |
3720 match_mode_neutral);
3724 * Transforms a l_MulS into a "real" MulS node.
3726 * @return the created ia32 Mul node
3728 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3729 ir_node *left = get_binop_left(node);
3730 ir_node *right = get_binop_right(node);
3732 return gen_binop(node, left, right, new_rd_ia32_Mul,
3733 match_commutative | match_am | match_mode_neutral);
3737 * Transforms a l_IMulS into a "real" IMul1OPS node.
3739 * @return the created ia32 IMul1OP node
3741 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3742 ir_node *left = get_binop_left(node);
3743 ir_node *right = get_binop_right(node);
3745 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3746 match_commutative | match_am | match_mode_neutral);
3749 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3750 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3751 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3752 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3753 match_am | match_immediate | match_mode_neutral);
3755 if(is_Proj(lowered)) {
3756 lowered = get_Proj_pred(lowered);
3758 assert(is_ia32_Sub(lowered));
3759 set_irn_mode(lowered, mode_T);
3765 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3766 return gen_binop_flags(node, new_rd_ia32_Sbb,
3767 match_am | match_immediate | match_mode_neutral);
3771 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3772 * op1 - target to be shifted
3773 * op2 - contains bits to be shifted into target
3775 * Only op3 can be an immediate.
3777 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3778 ir_node *low, ir_node *count)
3780 ir_node *block = get_nodes_block(node);
3781 ir_node *new_block = be_transform_node(block);
3782 ir_graph *irg = current_ir_graph;
3783 dbg_info *dbgi = get_irn_dbg_info(node);
3784 ir_node *new_high = be_transform_node(high);
3785 ir_node *new_low = be_transform_node(low);
3789 /* the shift amount can be any mode that is bigger than 5 bits, since all
3790 * other bits are ignored anyway */
3791 while (is_Conv(count) &&
3792 get_irn_n_edges(count) == 1 &&
3793 mode_is_int(get_irn_mode(count))) {
3794 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3795 count = get_Conv_op(count);
3797 new_count = create_immediate_or_transform(count, 0);
3799 if (is_ia32_l_ShlD(node)) {
3800 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3803 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3806 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3811 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3813 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3814 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3815 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3816 return gen_lowered_64bit_shifts(node, high, low, count);
3819 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3821 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3822 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3823 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3824 return gen_lowered_64bit_shifts(node, high, low, count);
3827 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3828 ir_node *src_block = get_nodes_block(node);
3829 ir_node *block = be_transform_node(src_block);
3830 ir_graph *irg = current_ir_graph;
3831 dbg_info *dbgi = get_irn_dbg_info(node);
3832 ir_node *frame = get_irg_frame(irg);
3833 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3834 ir_node *nomem = new_NoMem();
3835 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3836 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3837 ir_node *new_val_low = be_transform_node(val_low);
3838 ir_node *new_val_high = be_transform_node(val_high);
3843 ir_node *store_high;
3845 if(!mode_is_signed(get_irn_mode(val_high))) {
3846 panic("unsigned long long -> float not supported yet (%+F)", node);
3850 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3852 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3854 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3855 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3857 set_ia32_use_frame(store_low);
3858 set_ia32_use_frame(store_high);
3859 set_ia32_op_type(store_low, ia32_AddrModeD);
3860 set_ia32_op_type(store_high, ia32_AddrModeD);
3861 set_ia32_ls_mode(store_low, mode_Iu);
3862 set_ia32_ls_mode(store_high, mode_Is);
3863 add_ia32_am_offs_int(store_high, 4);
3867 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3870 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3872 set_ia32_use_frame(fild);
3873 set_ia32_op_type(fild, ia32_AddrModeS);
3874 set_ia32_ls_mode(fild, mode_Ls);
3876 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3878 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3881 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3882 ir_node *src_block = get_nodes_block(node);
3883 ir_node *block = be_transform_node(src_block);
3884 ir_graph *irg = current_ir_graph;
3885 dbg_info *dbgi = get_irn_dbg_info(node);
3886 ir_node *frame = get_irg_frame(irg);
3887 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3888 ir_node *nomem = new_NoMem();
3889 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3890 ir_node *new_val = be_transform_node(val);
3891 ir_node *fist, *mem;
3893 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3894 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3895 set_ia32_use_frame(fist);
3896 set_ia32_op_type(fist, ia32_AddrModeD);
3897 set_ia32_ls_mode(fist, mode_Ls);
3903 * the BAD transformer.
3905 static ir_node *bad_transform(ir_node *node) {
3906 panic("No transform function for %+F available.", node);
3910 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3911 ir_graph *irg = current_ir_graph;
3912 ir_node *block = be_transform_node(get_nodes_block(node));
3913 ir_node *pred = get_Proj_pred(node);
3914 ir_node *new_pred = be_transform_node(pred);
3915 ir_node *frame = get_irg_frame(irg);
3916 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3917 dbg_info *dbgi = get_irn_dbg_info(node);
3918 long pn = get_Proj_proj(node);
3923 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3924 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3925 set_ia32_use_frame(load);
3926 set_ia32_op_type(load, ia32_AddrModeS);
3927 set_ia32_ls_mode(load, mode_Iu);
3928 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3929 * 32 bit from it with this particular load */
3930 attr = get_ia32_attr(load);
3931 attr->data.need_64bit_stackent = 1;
3933 if (pn == pn_ia32_l_FloattoLL_res_high) {
3934 add_ia32_am_offs_int(load, 4);
3936 assert(pn == pn_ia32_l_FloattoLL_res_low);
3939 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3945 * Transform the Projs of an AddSP.
3947 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
3948 ir_node *block = be_transform_node(get_nodes_block(node));
3949 ir_node *pred = get_Proj_pred(node);
3950 ir_node *new_pred = be_transform_node(pred);
3951 ir_graph *irg = current_ir_graph;
3952 dbg_info *dbgi = get_irn_dbg_info(node);
3953 long proj = get_Proj_proj(node);
3955 if (proj == pn_be_AddSP_sp) {
3956 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3957 pn_ia32_SubSP_stack);
3958 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3960 } else if(proj == pn_be_AddSP_res) {
3961 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3962 pn_ia32_SubSP_addr);
3963 } else if (proj == pn_be_AddSP_M) {
3964 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
3967 panic("No idea how to transform proj->AddSP");
3971 * Transform the Projs of a SubSP.
3973 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
3974 ir_node *block = be_transform_node(get_nodes_block(node));
3975 ir_node *pred = get_Proj_pred(node);
3976 ir_node *new_pred = be_transform_node(pred);
3977 ir_graph *irg = current_ir_graph;
3978 dbg_info *dbgi = get_irn_dbg_info(node);
3979 long proj = get_Proj_proj(node);
3981 if (proj == pn_be_SubSP_sp) {
3982 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3983 pn_ia32_AddSP_stack);
3984 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3986 } else if (proj == pn_be_SubSP_M) {
3987 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
3990 panic("No idea how to transform proj->SubSP");
3994 * Transform and renumber the Projs from a Load.
3996 static ir_node *gen_Proj_Load(ir_node *node) {
3998 ir_node *block = be_transform_node(get_nodes_block(node));
3999 ir_node *pred = get_Proj_pred(node);
4000 ir_graph *irg = current_ir_graph;
4001 dbg_info *dbgi = get_irn_dbg_info(node);
4002 long proj = get_Proj_proj(node);
4004 /* loads might be part of source address mode matches, so we don't
4005 * transform the ProjMs yet (with the exception of loads whose result is
4008 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4011 /* this is needed, because sometimes we have loops that are only
4012 reachable through the ProjM */
4013 be_enqueue_preds(node);
4014 /* do it in 2 steps, to silence firm verifier */
4015 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4016 set_Proj_proj(res, pn_ia32_mem);
4020 /* renumber the proj */
4021 new_pred = be_transform_node(pred);
4022 if (is_ia32_Load(new_pred)) {
4025 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4027 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4028 case pn_Load_X_regular:
4029 return new_rd_Jmp(dbgi, irg, block);
4030 case pn_Load_X_except:
4031 /* This Load might raise an exception. Mark it. */
4032 set_ia32_exc_label(new_pred, 1);
4033 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4037 } else if (is_ia32_Conv_I2I(new_pred) ||
4038 is_ia32_Conv_I2I8Bit(new_pred)) {
4039 set_irn_mode(new_pred, mode_T);
4040 if (proj == pn_Load_res) {
4041 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4042 } else if (proj == pn_Load_M) {
4043 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4045 } else if (is_ia32_xLoad(new_pred)) {
4048 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4050 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4051 case pn_Load_X_regular:
4052 return new_rd_Jmp(dbgi, irg, block);
4053 case pn_Load_X_except:
4054 /* This Load might raise an exception. Mark it. */
4055 set_ia32_exc_label(new_pred, 1);
4056 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4060 } else if (is_ia32_vfld(new_pred)) {
4063 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4065 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4066 case pn_Load_X_regular:
4067 return new_rd_Jmp(dbgi, irg, block);
4068 case pn_Load_X_except:
4069 /* This Load might raise an exception. Mark it. */
4070 set_ia32_exc_label(new_pred, 1);
4071 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4076 /* can happen for ProJMs when source address mode happened for the
4079 /* however it should not be the result proj, as that would mean the
4080 load had multiple users and should not have been used for
4082 if (proj != pn_Load_M) {
4083 panic("internal error: transformed node not a Load");
4085 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4088 panic("No idea how to transform proj");
4092 * Transform and renumber the Projs from a DivMod like instruction.
4094 static ir_node *gen_Proj_DivMod(ir_node *node) {
4095 ir_node *block = be_transform_node(get_nodes_block(node));
4096 ir_node *pred = get_Proj_pred(node);
4097 ir_node *new_pred = be_transform_node(pred);
4098 ir_graph *irg = current_ir_graph;
4099 dbg_info *dbgi = get_irn_dbg_info(node);
4100 long proj = get_Proj_proj(node);
4102 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4104 switch (get_irn_opcode(pred)) {
4108 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4110 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4111 case pn_Div_X_regular:
4112 return new_rd_Jmp(dbgi, irg, block);
4113 case pn_Div_X_except:
4114 set_ia32_exc_label(new_pred, 1);
4115 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4123 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4125 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4126 case pn_Mod_X_except:
4127 set_ia32_exc_label(new_pred, 1);
4128 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4136 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4137 case pn_DivMod_res_div:
4138 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4139 case pn_DivMod_res_mod:
4140 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4141 case pn_DivMod_X_regular:
4142 return new_rd_Jmp(dbgi, irg, block);
4143 case pn_DivMod_X_except:
4144 set_ia32_exc_label(new_pred, 1);
4145 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4154 panic("No idea how to transform proj->DivMod");
4158 * Transform and renumber the Projs from a CopyB.
4160 static ir_node *gen_Proj_CopyB(ir_node *node) {
4161 ir_node *block = be_transform_node(get_nodes_block(node));
4162 ir_node *pred = get_Proj_pred(node);
4163 ir_node *new_pred = be_transform_node(pred);
4164 ir_graph *irg = current_ir_graph;
4165 dbg_info *dbgi = get_irn_dbg_info(node);
4166 long proj = get_Proj_proj(node);
4169 case pn_CopyB_M_regular:
4170 if (is_ia32_CopyB_i(new_pred)) {
4171 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4172 } else if (is_ia32_CopyB(new_pred)) {
4173 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4180 panic("No idea how to transform proj->CopyB");
4184 * Transform and renumber the Projs from a Quot.
4186 static ir_node *gen_Proj_Quot(ir_node *node) {
4187 ir_node *block = be_transform_node(get_nodes_block(node));
4188 ir_node *pred = get_Proj_pred(node);
4189 ir_node *new_pred = be_transform_node(pred);
4190 ir_graph *irg = current_ir_graph;
4191 dbg_info *dbgi = get_irn_dbg_info(node);
4192 long proj = get_Proj_proj(node);
4196 if (is_ia32_xDiv(new_pred)) {
4197 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4198 } else if (is_ia32_vfdiv(new_pred)) {
4199 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4203 if (is_ia32_xDiv(new_pred)) {
4204 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4205 } else if (is_ia32_vfdiv(new_pred)) {
4206 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4209 case pn_Quot_X_regular:
4210 case pn_Quot_X_except:
4215 panic("No idea how to transform proj->Quot");
4218 static ir_node *gen_be_Call(ir_node *node) {
4219 ir_node *res = be_duplicate_node(node);
4222 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4224 /* Run the x87 simulator if the call returns a float value */
4225 call_tp = be_Call_get_type(node);
4226 if (get_method_n_ress(call_tp) > 0) {
4227 ir_type *const res_type = get_method_res_type(call_tp, 0);
4228 ir_mode *const res_mode = get_type_mode(res_type);
4230 if (res_mode != NULL && mode_is_float(res_mode)) {
4231 env_cg->do_x87_sim = 1;
4238 static ir_node *gen_be_IncSP(ir_node *node) {
4239 ir_node *res = be_duplicate_node(node);
4240 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4246 * Transform the Projs from a be_Call.
4248 static ir_node *gen_Proj_be_Call(ir_node *node) {
4249 ir_node *block = be_transform_node(get_nodes_block(node));
4250 ir_node *call = get_Proj_pred(node);
4251 ir_node *new_call = be_transform_node(call);
4252 ir_graph *irg = current_ir_graph;
4253 dbg_info *dbgi = get_irn_dbg_info(node);
4254 ir_type *method_type = be_Call_get_type(call);
4255 int n_res = get_method_n_ress(method_type);
4256 long proj = get_Proj_proj(node);
4257 ir_mode *mode = get_irn_mode(node);
4259 const arch_register_class_t *cls;
4261 /* The following is kinda tricky: If we're using SSE, then we have to
4262 * move the result value of the call in floating point registers to an
4263 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4264 * after the call, we have to make sure to correctly make the
4265 * MemProj and the result Proj use these 2 nodes
4267 if (proj == pn_be_Call_M_regular) {
4268 // get new node for result, are we doing the sse load/store hack?
4269 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4270 ir_node *call_res_new;
4271 ir_node *call_res_pred = NULL;
4273 if (call_res != NULL) {
4274 call_res_new = be_transform_node(call_res);
4275 call_res_pred = get_Proj_pred(call_res_new);
4278 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4279 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4280 pn_be_Call_M_regular);
4282 assert(is_ia32_xLoad(call_res_pred));
4283 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4287 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4288 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4290 ir_node *frame = get_irg_frame(irg);
4291 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4293 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4296 /* in case there is no memory output: create one to serialize the copy
4298 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4299 pn_be_Call_M_regular);
4300 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4301 pn_be_Call_first_res);
4303 /* store st(0) onto stack */
4304 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4306 set_ia32_op_type(fstp, ia32_AddrModeD);
4307 set_ia32_use_frame(fstp);
4309 /* load into SSE register */
4310 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4312 set_ia32_op_type(sse_load, ia32_AddrModeS);
4313 set_ia32_use_frame(sse_load);
4315 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4321 /* transform call modes */
4322 if (mode_is_data(mode)) {
4323 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4327 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4331 * Transform the Projs from a Cmp.
4333 static ir_node *gen_Proj_Cmp(ir_node *node)
4335 /* this probably means not all mode_b nodes were lowered... */
4336 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4341 * Transform the Projs from a Bound.
4343 static ir_node *gen_Proj_Bound(ir_node *node)
4345 ir_node *new_node, *block;
4346 ir_node *pred = get_Proj_pred(node);
4348 switch (get_Proj_proj(node)) {
4350 return be_transform_node(get_Bound_mem(pred));
4351 case pn_Bound_X_regular:
4352 new_node = be_transform_node(pred);
4353 block = get_nodes_block(new_node);
4354 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4355 case pn_Bound_X_except:
4356 new_node = be_transform_node(pred);
4357 block = get_nodes_block(new_node);
4358 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4360 return be_transform_node(get_Bound_index(pred));
4362 panic("unsupported Proj from Bound");
4366 static ir_node *gen_Proj_ASM(ir_node *node)
4372 if (get_irn_mode(node) != mode_M)
4373 return be_duplicate_node(node);
4375 pred = get_Proj_pred(node);
4376 new_pred = be_transform_node(pred);
4377 block = get_nodes_block(new_pred);
4378 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4379 get_ia32_n_res(new_pred) + 1);
4383 * Transform and potentially renumber Proj nodes.
4385 static ir_node *gen_Proj(ir_node *node) {
4386 ir_node *pred = get_Proj_pred(node);
4389 switch (get_irn_opcode(pred)) {
4391 proj = get_Proj_proj(node);
4392 if (proj == pn_Store_M) {
4393 return be_transform_node(pred);
4395 panic("No idea how to transform proj->Store");
4398 return gen_Proj_Load(node);
4400 return gen_Proj_ASM(node);
4404 return gen_Proj_DivMod(node);
4406 return gen_Proj_CopyB(node);
4408 return gen_Proj_Quot(node);
4410 return gen_Proj_be_SubSP(node);
4412 return gen_Proj_be_AddSP(node);
4414 return gen_Proj_be_Call(node);
4416 return gen_Proj_Cmp(node);
4418 return gen_Proj_Bound(node);
4420 proj = get_Proj_proj(node);
4421 if (proj == pn_Start_X_initial_exec) {
4422 ir_node *block = get_nodes_block(pred);
4423 dbg_info *dbgi = get_irn_dbg_info(node);
4426 /* we exchange the ProjX with a jump */
4427 block = be_transform_node(block);
4428 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
4431 if (node == be_get_old_anchor(anchor_tls)) {
4432 return gen_Proj_tls(node);
4437 if (is_ia32_l_FloattoLL(pred)) {
4438 return gen_Proj_l_FloattoLL(node);
4440 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4444 ir_mode *mode = get_irn_mode(node);
4445 if (ia32_mode_needs_gp_reg(mode)) {
4446 ir_node *new_pred = be_transform_node(pred);
4447 ir_node *block = be_transform_node(get_nodes_block(node));
4448 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4449 mode_Iu, get_Proj_proj(node));
4450 #ifdef DEBUG_libfirm
4451 new_proj->node_nr = node->node_nr;
4457 return be_duplicate_node(node);
4461 * Enters all transform functions into the generic pointer
4463 static void register_transformers(void)
4467 /* first clear the generic function pointer for all ops */
4468 clear_irp_opcodes_generic_func();
4470 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4471 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4509 /* transform ops from intrinsic lowering */
4521 GEN(ia32_l_LLtoFloat);
4522 GEN(ia32_l_FloattoLL);
4528 /* we should never see these nodes */
4543 /* handle generic backend nodes */
4552 op_Mulh = get_op_Mulh();
4561 * Pre-transform all unknown and noreg nodes.
4563 static void ia32_pretransform_node(void *arch_cg) {
4564 ia32_code_gen_t *cg = arch_cg;
4566 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4567 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4568 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4569 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4570 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4571 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4576 * Walker, checks if all ia32 nodes producing more than one result have their
4577 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4579 static void add_missing_keep_walker(ir_node *node, void *data)
4582 unsigned found_projs = 0;
4583 const ir_edge_t *edge;
4584 ir_mode *mode = get_irn_mode(node);
4589 if(!is_ia32_irn(node))
4592 n_outs = get_ia32_n_res(node);
4595 if(is_ia32_SwitchJmp(node))
4598 assert(n_outs < (int) sizeof(unsigned) * 8);
4599 foreach_out_edge(node, edge) {
4600 ir_node *proj = get_edge_src_irn(edge);
4601 int pn = get_Proj_proj(proj);
4603 if (get_irn_mode(proj) == mode_M)
4606 assert(pn < n_outs);
4607 found_projs |= 1 << pn;
4611 /* are keeps missing? */
4613 for(i = 0; i < n_outs; ++i) {
4616 const arch_register_req_t *req;
4617 const arch_register_class_t *cls;
4619 if(found_projs & (1 << i)) {
4623 req = get_ia32_out_req(node, i);
4628 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4632 block = get_nodes_block(node);
4633 in[0] = new_r_Proj(current_ir_graph, block, node,
4634 arch_register_class_mode(cls), i);
4635 if(last_keep != NULL) {
4636 be_Keep_add_node(last_keep, cls, in[0]);
4638 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4639 if(sched_is_scheduled(node)) {
4640 sched_add_after(node, last_keep);
4647 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4650 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4652 ir_graph *irg = be_get_birg_irg(cg->birg);
4653 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4656 /* do the transformation */
4657 void ia32_transform_graph(ia32_code_gen_t *cg) {
4659 ir_graph *irg = cg->irg;
4661 register_transformers();
4663 initial_fpcw = NULL;
4665 BE_TIMER_PUSH(t_heights);
4666 heights = heights_new(irg);
4667 BE_TIMER_POP(t_heights);
4668 ia32_calculate_non_address_mode_nodes(cg->birg);
4670 /* the transform phase is not safe for CSE (yet) because several nodes get
4671 * attributes set after their creation */
4672 cse_last = get_opt_cse();
4675 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4677 set_opt_cse(cse_last);
4679 ia32_free_non_address_mode_nodes();
4680 heights_free(heights);
4684 void ia32_init_transform(void)
4686 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");