2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
36 #include "irgraph_t.h"
41 #include "iredges_t.h"
54 #include "../benode_t.h"
55 #include "../besched.h"
57 #include "../beutil.h"
58 #include "../beirg_t.h"
59 #include "../betranshlp.h"
62 #include "bearch_ia32_t.h"
63 #include "ia32_common_transform.h"
64 #include "ia32_nodes_attr.h"
65 #include "ia32_transform.h"
66 #include "ia32_new_nodes.h"
67 #include "ia32_map_regs.h"
68 #include "ia32_dbg_stat.h"
69 #include "ia32_optimize.h"
70 #include "ia32_util.h"
71 #include "ia32_address_mode.h"
72 #include "ia32_architecture.h"
74 #include "gen_ia32_regalloc_if.h"
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
82 #define TP_SFP_SIGN "ia32_sfp_sign"
83 #define TP_DFP_SIGN "ia32_dfp_sign"
84 #define TP_SFP_ABS "ia32_sfp_abs"
85 #define TP_DFP_ABS "ia32_dfp_abs"
86 #define TP_INT_MAX "ia32_int_max"
88 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
89 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
90 #define ENT_SFP_ABS "IA32_SFP_ABS"
91 #define ENT_DFP_ABS "IA32_DFP_ABS"
92 #define ENT_INT_MAX "IA32_INT_MAX"
94 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
95 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
97 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
99 static ir_node *initial_fpcw = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *create_immediate_or_transform(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
132 dbg_info *dbgi, ir_node *block,
133 ir_node *op, ir_node *orig_node);
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node) {
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node) {
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node) {
147 return is_Const(node) && is_Const_all_one(node);
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_x87_Const(ir_node *node)
155 tarval *tv = get_Const_tarval(node);
156 if (tarval_is_null(tv) || tarval_is_one(tv))
159 /* TODO: match all the other float constants */
164 * returns true if constant can be created with a simple float command
166 static bool is_simple_sse_Const(ir_node *node)
168 tarval *tv = get_Const_tarval(node);
169 ir_mode *mode = get_tarval_mode(tv);
174 if (tarval_is_null(tv) || tarval_is_one(tv))
177 if (mode == mode_D) {
178 unsigned val = get_tarval_sub_bits(tv, 0) |
179 (get_tarval_sub_bits(tv, 1) << 8) |
180 (get_tarval_sub_bits(tv, 2) << 16) |
181 (get_tarval_sub_bits(tv, 3) << 24);
183 /* lower 32bit are zero, really a 32bit constant */
187 /* TODO: match all the other float constants */
192 * Transforms a Const.
194 static ir_node *gen_Const(ir_node *node) {
195 ir_graph *irg = current_ir_graph;
196 ir_node *old_block = get_nodes_block(node);
197 ir_node *block = be_transform_node(old_block);
198 dbg_info *dbgi = get_irn_dbg_info(node);
199 ir_mode *mode = get_irn_mode(node);
201 assert(is_Const(node));
203 if (mode_is_float(mode)) {
205 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
206 ir_node *nomem = new_NoMem();
210 if (ia32_cg_config.use_sse2) {
211 tarval *tv = get_Const_tarval(node);
212 if (tarval_is_null(tv)) {
213 load = new_rd_ia32_xZero(dbgi, irg, block);
214 set_ia32_ls_mode(load, mode);
216 } else if (tarval_is_one(tv)) {
217 int cnst = mode == mode_F ? 26 : 55;
218 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
219 ir_node *imm2 = create_Immediate(NULL, 0, 2);
220 ir_node *pslld, *psrld;
222 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
223 set_ia32_ls_mode(load, mode);
224 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
225 set_ia32_ls_mode(pslld, mode);
226 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
227 set_ia32_ls_mode(psrld, mode);
229 } else if (mode == mode_F) {
230 /* we can place any 32bit constant by using a movd gp, sse */
231 unsigned val = get_tarval_sub_bits(tv, 0) |
232 (get_tarval_sub_bits(tv, 1) << 8) |
233 (get_tarval_sub_bits(tv, 2) << 16) |
234 (get_tarval_sub_bits(tv, 3) << 24);
235 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
236 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
237 set_ia32_ls_mode(load, mode);
240 if (mode == mode_D) {
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *imm32 = create_Immediate(NULL, 0, 32);
247 ir_node *cnst, *psllq;
249 /* fine, lower 32bit are zero, produce 32bit value */
250 val = get_tarval_sub_bits(tv, 4) |
251 (get_tarval_sub_bits(tv, 5) << 8) |
252 (get_tarval_sub_bits(tv, 6) << 16) |
253 (get_tarval_sub_bits(tv, 7) << 24);
254 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
255 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
256 set_ia32_ls_mode(load, mode);
257 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
258 set_ia32_ls_mode(psllq, mode);
263 floatent = create_float_const_entity(node);
265 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
267 set_ia32_op_type(load, ia32_AddrModeS);
268 set_ia32_am_sc(load, floatent);
269 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
270 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
273 if (is_Const_null(node)) {
274 load = new_rd_ia32_vfldz(dbgi, irg, block);
276 set_ia32_ls_mode(load, mode);
277 } else if (is_Const_one(node)) {
278 load = new_rd_ia32_vfld1(dbgi, irg, block);
280 set_ia32_ls_mode(load, mode);
282 floatent = create_float_const_entity(node);
284 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
285 set_ia32_op_type(load, ia32_AddrModeS);
286 set_ia32_am_sc(load, floatent);
287 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
288 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
289 /* take the mode from the entity */
290 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
294 /* Const Nodes before the initial IncSP are a bad idea, because
295 * they could be spilled and we have no SP ready at that point yet.
296 * So add a dependency to the initial frame pointer calculation to
297 * avoid that situation.
299 if (get_irg_start_block(irg) == block) {
300 add_irn_dep(load, get_irg_frame(irg));
303 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
305 } else { /* non-float mode */
307 tarval *tv = get_Const_tarval(node);
310 tv = tarval_convert_to(tv, mode_Iu);
312 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
314 panic("couldn't convert constant tarval (%+F)", node);
316 val = get_tarval_long(tv);
318 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
319 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
322 if (get_irg_start_block(irg) == block) {
323 add_irn_dep(cnst, get_irg_frame(irg));
331 * Transforms a SymConst.
333 static ir_node *gen_SymConst(ir_node *node) {
334 ir_graph *irg = current_ir_graph;
335 ir_node *old_block = get_nodes_block(node);
336 ir_node *block = be_transform_node(old_block);
337 dbg_info *dbgi = get_irn_dbg_info(node);
338 ir_mode *mode = get_irn_mode(node);
341 if (mode_is_float(mode)) {
342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
343 ir_node *nomem = new_NoMem();
345 if (ia32_cg_config.use_sse2)
346 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
348 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if(get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
361 /* Const Nodes before the initial IncSP are a bad idea, because
362 * they could be spilled and we have no SP ready at that point yet
364 if (get_irg_start_block(irg) == block) {
365 add_irn_dep(cnst, get_irg_frame(irg));
368 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
373 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
374 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
375 static const struct {
377 const char *ent_name;
378 const char *cnst_str;
381 } names [ia32_known_const_max] = {
382 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
383 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
384 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
385 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
386 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
388 static ir_entity *ent_cache[ia32_known_const_max];
390 const char *tp_name, *ent_name, *cnst_str;
398 ent_name = names[kct].ent_name;
399 if (! ent_cache[kct]) {
400 tp_name = names[kct].tp_name;
401 cnst_str = names[kct].cnst_str;
403 switch (names[kct].mode) {
404 case 0: mode = mode_Iu; break;
405 case 1: mode = mode_Lu; break;
406 default: mode = mode_F; break;
408 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
409 tp = new_type_primitive(new_id_from_str(tp_name), mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, names[kct].align);
413 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
415 set_entity_ld_ident(ent, get_entity_ident(ent));
416 set_entity_visibility(ent, visibility_local);
417 set_entity_variability(ent, variability_constant);
418 set_entity_allocation(ent, allocation_static);
420 /* we create a new entity here: It's initialization must resist on the
422 rem = current_ir_graph;
423 current_ir_graph = get_const_code_irg();
424 cnst = new_Const(mode, tv);
425 current_ir_graph = rem;
427 set_atomic_ent_value(ent, cnst);
429 /* cache the entry */
430 ent_cache[kct] = ent;
433 return ent_cache[kct];
437 * return true if the node is a Proj(Load) and could be used in source address
438 * mode for another node. Will return only true if the @p other node is not
439 * dependent on the memory of the Load (for binary operations use the other
440 * input here, for unary operations use NULL).
442 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
443 ir_node *other, ir_node *other2, match_flags_t flags)
448 /* float constants are always available */
449 if (is_Const(node)) {
450 ir_mode *mode = get_irn_mode(node);
451 if (mode_is_float(mode)) {
452 if (ia32_cg_config.use_sse2) {
453 if (is_simple_sse_Const(node))
456 if (is_simple_x87_Const(node))
459 if (get_irn_n_edges(node) > 1)
467 load = get_Proj_pred(node);
468 pn = get_Proj_proj(node);
469 if (!is_Load(load) || pn != pn_Load_res)
471 if (get_nodes_block(load) != block)
473 /* we only use address mode if we're the only user of the load */
474 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
476 /* in some edge cases with address mode we might reach the load normally
477 * and through some AM sequence, if it is already materialized then we
478 * can't create an AM node from it */
479 if (be_is_transformed(node))
482 /* don't do AM if other node inputs depend on the load (via mem-proj) */
483 if (other != NULL && prevents_AM(block, load, other))
486 if (other2 != NULL && prevents_AM(block, load, other2))
492 typedef struct ia32_address_mode_t ia32_address_mode_t;
493 struct ia32_address_mode_t {
498 ia32_op_type_t op_type;
502 unsigned commutative : 1;
503 unsigned ins_permuted : 1;
506 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
510 /* construct load address */
511 memset(addr, 0, sizeof(addr[0]));
512 ia32_create_address_mode(addr, ptr, /*force=*/0);
514 noreg_gp = ia32_new_NoReg_gp(env_cg);
515 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
516 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
517 addr->mem = be_transform_node(mem);
520 static void build_address(ia32_address_mode_t *am, ir_node *node)
522 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
523 ia32_address_t *addr = &am->addr;
529 if (is_Const(node)) {
530 ir_entity *entity = create_float_const_entity(node);
531 addr->base = noreg_gp;
532 addr->index = noreg_gp;
533 addr->mem = new_NoMem();
534 addr->symconst_ent = entity;
536 am->ls_mode = get_type_mode(get_entity_type(entity));
537 am->pinned = op_pin_state_floats;
541 load = get_Proj_pred(node);
542 ptr = get_Load_ptr(load);
543 mem = get_Load_mem(load);
544 new_mem = be_transform_node(mem);
545 am->pinned = get_irn_pinned(load);
546 am->ls_mode = get_Load_mode(load);
547 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
550 /* construct load address */
551 ia32_create_address_mode(addr, ptr, /*force=*/0);
553 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
554 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
558 static void set_address(ir_node *node, const ia32_address_t *addr)
560 set_ia32_am_scale(node, addr->scale);
561 set_ia32_am_sc(node, addr->symconst_ent);
562 set_ia32_am_offs_int(node, addr->offset);
563 if(addr->symconst_sign)
564 set_ia32_am_sc_sign(node);
566 set_ia32_use_frame(node);
567 set_ia32_frame_ent(node, addr->frame_entity);
571 * Apply attributes of a given address mode to a node.
573 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
575 set_address(node, &am->addr);
577 set_ia32_op_type(node, am->op_type);
578 set_ia32_ls_mode(node, am->ls_mode);
579 if (am->pinned == op_pin_state_pinned) {
580 /* beware: some nodes are already pinned and did not allow to change the state */
581 if (get_irn_pinned(node) != op_pin_state_pinned)
582 set_irn_pinned(node, op_pin_state_pinned);
585 set_ia32_commutative(node);
589 * Check, if a given node is a Down-Conv, ie. a integer Conv
590 * from a mode with a mode with more bits to a mode with lesser bits.
591 * Moreover, we return only true if the node has not more than 1 user.
593 * @param node the node
594 * @return non-zero if node is a Down-Conv
596 static int is_downconv(const ir_node *node)
604 /* we only want to skip the conv when we're the only user
605 * (not optimal but for now...)
607 if(get_irn_n_edges(node) > 1)
610 src_mode = get_irn_mode(get_Conv_op(node));
611 dest_mode = get_irn_mode(node);
612 return ia32_mode_needs_gp_reg(src_mode)
613 && ia32_mode_needs_gp_reg(dest_mode)
614 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
617 /* Skip all Down-Conv's on a given node and return the resulting node. */
618 ir_node *ia32_skip_downconv(ir_node *node) {
619 while (is_downconv(node))
620 node = get_Conv_op(node);
625 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
627 ir_mode *mode = get_irn_mode(node);
632 if(mode_is_signed(mode)) {
637 block = get_nodes_block(node);
638 dbgi = get_irn_dbg_info(node);
640 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
644 * matches operands of a node into ia32 addressing/operand modes. This covers
645 * usage of source address mode, immediates, operations with non 32-bit modes,
647 * The resulting data is filled into the @p am struct. block is the block
648 * of the node whose arguments are matched. op1, op2 are the first and second
649 * input that are matched (op1 may be NULL). other_op is another unrelated
650 * input that is not matched! but which is needed sometimes to check if AM
651 * for op1/op2 is legal.
652 * @p flags describes the supported modes of the operation in detail.
654 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
655 ir_node *op1, ir_node *op2, ir_node *other_op,
658 ia32_address_t *addr = &am->addr;
659 ir_mode *mode = get_irn_mode(op2);
660 int mode_bits = get_mode_size_bits(mode);
661 ir_node *noreg_gp, *new_op1, *new_op2;
663 unsigned commutative;
664 int use_am_and_immediates;
667 memset(am, 0, sizeof(am[0]));
669 commutative = (flags & match_commutative) != 0;
670 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
671 use_am = (flags & match_am) != 0;
672 use_immediate = (flags & match_immediate) != 0;
673 assert(!use_am_and_immediates || use_immediate);
676 assert(!commutative || op1 != NULL);
677 assert(use_am || !(flags & match_8bit_am));
678 assert(use_am || !(flags & match_16bit_am));
680 if (mode_bits == 8) {
681 if (!(flags & match_8bit_am))
683 /* we don't automatically add upconvs yet */
684 assert((flags & match_mode_neutral) || (flags & match_8bit));
685 } else if (mode_bits == 16) {
686 if (!(flags & match_16bit_am))
688 /* we don't automatically add upconvs yet */
689 assert((flags & match_mode_neutral) || (flags & match_16bit));
692 /* we can simply skip downconvs for mode neutral nodes: the upper bits
693 * can be random for these operations */
694 if (flags & match_mode_neutral) {
695 op2 = ia32_skip_downconv(op2);
697 op1 = ia32_skip_downconv(op1);
701 /* match immediates. firm nodes are normalized: constants are always on the
704 if (!(flags & match_try_am) && use_immediate) {
705 new_op2 = try_create_Immediate(op2, 0);
708 noreg_gp = ia32_new_NoReg_gp(env_cg);
709 if (new_op2 == NULL &&
710 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
711 build_address(am, op2);
712 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
713 if (mode_is_float(mode)) {
714 new_op2 = ia32_new_NoReg_vfp(env_cg);
718 am->op_type = ia32_AddrModeS;
719 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
721 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
723 build_address(am, op1);
725 if (mode_is_float(mode)) {
726 noreg = ia32_new_NoReg_vfp(env_cg);
731 if (new_op2 != NULL) {
734 new_op1 = be_transform_node(op2);
736 am->ins_permuted = 1;
738 am->op_type = ia32_AddrModeS;
740 am->op_type = ia32_Normal;
742 if (flags & match_try_am) {
748 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
750 new_op2 = be_transform_node(op2);
752 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
754 if (addr->base == NULL)
755 addr->base = noreg_gp;
756 if (addr->index == NULL)
757 addr->index = noreg_gp;
758 if (addr->mem == NULL)
759 addr->mem = new_NoMem();
761 am->new_op1 = new_op1;
762 am->new_op2 = new_op2;
763 am->commutative = commutative;
766 static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
768 mark_irn_visited(old_node);
769 be_set_transformed_node(old_node, new_node);
772 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
777 if (am->mem_proj == NULL)
780 /* we have to create a mode_T so the old MemProj can attach to us */
781 mode = get_irn_mode(node);
782 load = get_Proj_pred(am->mem_proj);
784 set_transformed_and_mark(load, node);
786 if (mode != mode_T) {
787 set_irn_mode(node, mode_T);
788 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
795 * Construct a standard binary operation, set AM and immediate if required.
797 * @param node The original node for which the binop is created
798 * @param op1 The first operand
799 * @param op2 The second operand
800 * @param func The node constructor function
801 * @return The constructed ia32 node.
803 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
804 construct_binop_func *func, match_flags_t flags)
807 ir_node *block, *new_block, *new_node;
808 ia32_address_mode_t am;
809 ia32_address_t *addr = &am.addr;
811 block = get_nodes_block(node);
812 match_arguments(&am, block, op1, op2, NULL, flags);
814 dbgi = get_irn_dbg_info(node);
815 new_block = be_transform_node(block);
816 new_node = func(dbgi, current_ir_graph, new_block,
817 addr->base, addr->index, addr->mem,
818 am.new_op1, am.new_op2);
819 set_am_attributes(new_node, &am);
820 /* we can't use source address mode anymore when using immediates */
821 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
822 set_ia32_am_support(new_node, ia32_am_none);
823 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
825 new_node = fix_mem_proj(new_node, &am);
832 n_ia32_l_binop_right,
833 n_ia32_l_binop_eflags
835 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
836 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
837 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
838 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
839 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
840 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
843 * Construct a binary operation which also consumes the eflags.
845 * @param node The node to transform
846 * @param func The node constructor function
847 * @param flags The match flags
848 * @return The constructor ia32 node
850 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
853 ir_node *src_block = get_nodes_block(node);
854 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
855 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
856 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
858 ir_node *block, *new_node, *new_eflags;
859 ia32_address_mode_t am;
860 ia32_address_t *addr = &am.addr;
862 match_arguments(&am, src_block, op1, op2, eflags, flags);
864 dbgi = get_irn_dbg_info(node);
865 block = be_transform_node(src_block);
866 new_eflags = be_transform_node(eflags);
867 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
868 addr->mem, am.new_op1, am.new_op2, new_eflags);
869 set_am_attributes(new_node, &am);
870 /* we can't use source address mode anymore when using immediates */
871 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
872 set_ia32_am_support(new_node, ia32_am_none);
873 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
875 new_node = fix_mem_proj(new_node, &am);
880 static ir_node *get_fpcw(void)
883 if (initial_fpcw != NULL)
886 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
887 &ia32_fp_cw_regs[REG_FPCW]);
888 initial_fpcw = be_transform_node(fpcw);
894 * Construct a standard binary operation, set AM and immediate if required.
896 * @param op1 The first operand
897 * @param op2 The second operand
898 * @param func The node constructor function
899 * @return The constructed ia32 node.
901 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
902 construct_binop_float_func *func,
905 ir_mode *mode = get_irn_mode(node);
907 ir_node *block, *new_block, *new_node;
908 ia32_address_mode_t am;
909 ia32_address_t *addr = &am.addr;
911 /* cannot use address mode with long double on x87 */
912 if (get_mode_size_bits(mode) > 64)
915 block = get_nodes_block(node);
916 match_arguments(&am, block, op1, op2, NULL, flags);
918 dbgi = get_irn_dbg_info(node);
919 new_block = be_transform_node(block);
920 new_node = func(dbgi, current_ir_graph, new_block,
921 addr->base, addr->index, addr->mem,
922 am.new_op1, am.new_op2, get_fpcw());
923 set_am_attributes(new_node, &am);
925 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
927 new_node = fix_mem_proj(new_node, &am);
933 * Construct a shift/rotate binary operation, sets AM and immediate if required.
935 * @param op1 The first operand
936 * @param op2 The second operand
937 * @param func The node constructor function
938 * @return The constructed ia32 node.
940 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
941 construct_shift_func *func,
945 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
947 assert(! mode_is_float(get_irn_mode(node)));
948 assert(flags & match_immediate);
949 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
951 if (flags & match_mode_neutral) {
952 op1 = ia32_skip_downconv(op1);
953 new_op1 = be_transform_node(op1);
954 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
955 new_op1 = create_upconv(op1, node);
957 new_op1 = be_transform_node(op1);
960 /* the shift amount can be any mode that is bigger than 5 bits, since all
961 * other bits are ignored anyway */
962 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
963 ir_node *const op = get_Conv_op(op2);
964 if (mode_is_float(get_irn_mode(op)))
967 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
969 new_op2 = create_immediate_or_transform(op2, 0);
971 dbgi = get_irn_dbg_info(node);
972 block = get_nodes_block(node);
973 new_block = be_transform_node(block);
974 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
975 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
977 /* lowered shift instruction may have a dependency operand, handle it here */
978 if (get_irn_arity(node) == 3) {
979 /* we have a dependency */
980 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
981 add_irn_dep(new_node, new_dep);
989 * Construct a standard unary operation, set AM and immediate if required.
991 * @param op The operand
992 * @param func The node constructor function
993 * @return The constructed ia32 node.
995 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
999 ir_node *block, *new_block, *new_op, *new_node;
1001 assert(flags == 0 || flags == match_mode_neutral);
1002 if (flags & match_mode_neutral) {
1003 op = ia32_skip_downconv(op);
1006 new_op = be_transform_node(op);
1007 dbgi = get_irn_dbg_info(node);
1008 block = get_nodes_block(node);
1009 new_block = be_transform_node(block);
1010 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1012 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1017 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1018 ia32_address_t *addr)
1020 ir_node *base, *index, *res;
1024 base = ia32_new_NoReg_gp(env_cg);
1026 base = be_transform_node(base);
1029 index = addr->index;
1030 if (index == NULL) {
1031 index = ia32_new_NoReg_gp(env_cg);
1033 index = be_transform_node(index);
1036 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1037 set_address(res, addr);
1043 * Returns non-zero if a given address mode has a symbolic or
1044 * numerical offset != 0.
1046 static int am_has_immediates(const ia32_address_t *addr)
1048 return addr->offset != 0 || addr->symconst_ent != NULL
1049 || addr->frame_entity || addr->use_frame;
1053 * Creates an ia32 Add.
1055 * @return the created ia32 Add node
1057 static ir_node *gen_Add(ir_node *node) {
1058 ir_mode *mode = get_irn_mode(node);
1059 ir_node *op1 = get_Add_left(node);
1060 ir_node *op2 = get_Add_right(node);
1062 ir_node *block, *new_block, *new_node, *add_immediate_op;
1063 ia32_address_t addr;
1064 ia32_address_mode_t am;
1066 if (mode_is_float(mode)) {
1067 if (ia32_cg_config.use_sse2)
1068 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1069 match_commutative | match_am);
1071 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1072 match_commutative | match_am);
1075 ia32_mark_non_am(node);
1077 op2 = ia32_skip_downconv(op2);
1078 op1 = ia32_skip_downconv(op1);
1082 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1083 * 1. Add with immediate -> Lea
1084 * 2. Add with possible source address mode -> Add
1085 * 3. Otherwise -> Lea
1087 memset(&addr, 0, sizeof(addr));
1088 ia32_create_address_mode(&addr, node, /*force=*/1);
1089 add_immediate_op = NULL;
1091 dbgi = get_irn_dbg_info(node);
1092 block = get_nodes_block(node);
1093 new_block = be_transform_node(block);
1096 if(addr.base == NULL && addr.index == NULL) {
1097 ir_graph *irg = current_ir_graph;
1098 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1099 addr.symconst_sign, addr.offset);
1100 add_irn_dep(new_node, get_irg_frame(irg));
1101 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1104 /* add with immediate? */
1105 if(addr.index == NULL) {
1106 add_immediate_op = addr.base;
1107 } else if(addr.base == NULL && addr.scale == 0) {
1108 add_immediate_op = addr.index;
1111 if(add_immediate_op != NULL) {
1112 if(!am_has_immediates(&addr)) {
1113 #ifdef DEBUG_libfirm
1114 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1117 return be_transform_node(add_immediate_op);
1120 new_node = create_lea_from_address(dbgi, new_block, &addr);
1121 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1125 /* test if we can use source address mode */
1126 match_arguments(&am, block, op1, op2, NULL, match_commutative
1127 | match_mode_neutral | match_am | match_immediate | match_try_am);
1129 /* construct an Add with source address mode */
1130 if (am.op_type == ia32_AddrModeS) {
1131 ir_graph *irg = current_ir_graph;
1132 ia32_address_t *am_addr = &am.addr;
1133 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1134 am_addr->index, am_addr->mem, am.new_op1,
1136 set_am_attributes(new_node, &am);
1137 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1139 new_node = fix_mem_proj(new_node, &am);
1144 /* otherwise construct a lea */
1145 new_node = create_lea_from_address(dbgi, new_block, &addr);
1146 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1151 * Creates an ia32 Mul.
1153 * @return the created ia32 Mul node
1155 static ir_node *gen_Mul(ir_node *node) {
1156 ir_node *op1 = get_Mul_left(node);
1157 ir_node *op2 = get_Mul_right(node);
1158 ir_mode *mode = get_irn_mode(node);
1160 if (mode_is_float(mode)) {
1161 if (ia32_cg_config.use_sse2)
1162 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1163 match_commutative | match_am);
1165 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1166 match_commutative | match_am);
1168 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1169 match_commutative | match_am | match_mode_neutral |
1170 match_immediate | match_am_and_immediates);
1174 * Creates an ia32 Mulh.
1175 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1176 * this result while Mul returns the lower 32 bit.
1178 * @return the created ia32 Mulh node
1180 static ir_node *gen_Mulh(ir_node *node)
1182 ir_node *block = get_nodes_block(node);
1183 ir_node *new_block = be_transform_node(block);
1184 ir_graph *irg = current_ir_graph;
1185 dbg_info *dbgi = get_irn_dbg_info(node);
1186 ir_node *op1 = get_Mulh_left(node);
1187 ir_node *op2 = get_Mulh_right(node);
1188 ir_mode *mode = get_irn_mode(node);
1189 construct_binop_func *func;
1190 ir_node *proj_res_high;
1192 ia32_address_mode_t am;
1193 ia32_address_t *addr = &am.addr;
1195 assert(!mode_is_float(mode) && "Mulh with float not supported");
1196 assert(get_mode_size_bits(mode) == 32);
1198 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1200 func = mode_is_signed(mode) ? new_rd_ia32_IMul1OP : new_rd_ia32_Mul;
1201 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
1202 am.new_op1, am.new_op2);
1204 set_am_attributes(new_node, &am);
1205 /* we can't use source address mode anymore when using immediates */
1206 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1207 set_ia32_am_support(new_node, ia32_am_none);
1208 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1210 assert(get_irn_mode(new_node) == mode_T);
1212 fix_mem_proj(new_node, &am);
1214 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1215 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1216 mode_Iu, pn_ia32_IMul1OP_res_high);
1218 return proj_res_high;
1224 * Creates an ia32 And.
1226 * @return The created ia32 And node
1228 static ir_node *gen_And(ir_node *node) {
1229 ir_node *op1 = get_And_left(node);
1230 ir_node *op2 = get_And_right(node);
1231 assert(! mode_is_float(get_irn_mode(node)));
1233 /* is it a zero extension? */
1234 if (is_Const(op2)) {
1235 tarval *tv = get_Const_tarval(op2);
1236 long v = get_tarval_long(tv);
1238 if (v == 0xFF || v == 0xFFFF) {
1239 dbg_info *dbgi = get_irn_dbg_info(node);
1240 ir_node *block = get_nodes_block(node);
1247 assert(v == 0xFFFF);
1250 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1255 return gen_binop(node, op1, op2, new_rd_ia32_And,
1256 match_commutative | match_mode_neutral | match_am
1263 * Creates an ia32 Or.
1265 * @return The created ia32 Or node
1267 static ir_node *gen_Or(ir_node *node) {
1268 ir_node *op1 = get_Or_left(node);
1269 ir_node *op2 = get_Or_right(node);
1271 assert (! mode_is_float(get_irn_mode(node)));
1272 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1273 | match_mode_neutral | match_am | match_immediate);
1279 * Creates an ia32 Eor.
1281 * @return The created ia32 Eor node
1283 static ir_node *gen_Eor(ir_node *node) {
1284 ir_node *op1 = get_Eor_left(node);
1285 ir_node *op2 = get_Eor_right(node);
1287 assert(! mode_is_float(get_irn_mode(node)));
1288 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1289 | match_mode_neutral | match_am | match_immediate);
1294 * Creates an ia32 Sub.
1296 * @return The created ia32 Sub node
1298 static ir_node *gen_Sub(ir_node *node) {
1299 ir_node *op1 = get_Sub_left(node);
1300 ir_node *op2 = get_Sub_right(node);
1301 ir_mode *mode = get_irn_mode(node);
1303 if (mode_is_float(mode)) {
1304 if (ia32_cg_config.use_sse2)
1305 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1307 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1311 if (is_Const(op2)) {
1312 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1316 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1317 | match_am | match_immediate);
1320 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1321 ir_node *const src_val,
1322 ir_node *const src_mem,
1323 ir_node *const am_mem)
1325 if (is_NoMem(am_mem)) {
1326 return be_transform_node(src_mem);
1327 } else if (is_Proj(src_val) &&
1329 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1330 /* avoid memory loop */
1332 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1333 ir_node *const ptr_pred = get_Proj_pred(src_val);
1334 int const arity = get_Sync_n_preds(src_mem);
1339 NEW_ARR_A(ir_node*, ins, arity + 1);
1341 for (i = arity - 1; i >= 0; --i) {
1342 ir_node *const pred = get_Sync_pred(src_mem, i);
1344 /* avoid memory loop */
1345 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1348 ins[n++] = be_transform_node(pred);
1353 return new_r_Sync(irg, block, n, ins);
1357 ins[0] = be_transform_node(src_mem);
1359 return new_r_Sync(irg, block, 2, ins);
1364 * Generates an ia32 DivMod with additional infrastructure for the
1365 * register allocator if needed.
1367 static ir_node *create_Div(ir_node *node)
1369 ir_graph *irg = current_ir_graph;
1370 dbg_info *dbgi = get_irn_dbg_info(node);
1371 ir_node *block = get_nodes_block(node);
1372 ir_node *new_block = be_transform_node(block);
1379 ir_node *sign_extension;
1380 ia32_address_mode_t am;
1381 ia32_address_t *addr = &am.addr;
1383 /* the upper bits have random contents for smaller modes */
1384 switch (get_irn_opcode(node)) {
1386 op1 = get_Div_left(node);
1387 op2 = get_Div_right(node);
1388 mem = get_Div_mem(node);
1389 mode = get_Div_resmode(node);
1392 op1 = get_Mod_left(node);
1393 op2 = get_Mod_right(node);
1394 mem = get_Mod_mem(node);
1395 mode = get_Mod_resmode(node);
1398 op1 = get_DivMod_left(node);
1399 op2 = get_DivMod_right(node);
1400 mem = get_DivMod_mem(node);
1401 mode = get_DivMod_resmode(node);
1404 panic("invalid divmod node %+F", node);
1407 match_arguments(&am, block, op1, op2, NULL, match_am);
1409 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1410 is the memory of the consumed address. We can have only the second op as address
1411 in Div nodes, so check only op2. */
1412 new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
1414 if (mode_is_signed(mode)) {
1415 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1416 add_irn_dep(produceval, get_irg_frame(irg));
1417 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1420 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1421 addr->index, new_mem, am.new_op2,
1422 am.new_op1, sign_extension);
1424 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1425 add_irn_dep(sign_extension, get_irg_frame(irg));
1427 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1428 addr->index, new_mem, am.new_op2,
1429 am.new_op1, sign_extension);
1432 set_irn_pinned(new_node, get_irn_pinned(node));
1434 set_am_attributes(new_node, &am);
1435 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1437 new_node = fix_mem_proj(new_node, &am);
1443 static ir_node *gen_Mod(ir_node *node) {
1444 return create_Div(node);
1447 static ir_node *gen_Div(ir_node *node) {
1448 return create_Div(node);
1451 static ir_node *gen_DivMod(ir_node *node) {
1452 return create_Div(node);
1458 * Creates an ia32 floating Div.
1460 * @return The created ia32 xDiv node
1462 static ir_node *gen_Quot(ir_node *node)
1464 ir_node *op1 = get_Quot_left(node);
1465 ir_node *op2 = get_Quot_right(node);
1467 if (ia32_cg_config.use_sse2) {
1468 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1470 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1476 * Creates an ia32 Shl.
1478 * @return The created ia32 Shl node
1480 static ir_node *gen_Shl(ir_node *node) {
1481 ir_node *left = get_Shl_left(node);
1482 ir_node *right = get_Shl_right(node);
1484 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1485 match_mode_neutral | match_immediate);
1489 * Creates an ia32 Shr.
1491 * @return The created ia32 Shr node
1493 static ir_node *gen_Shr(ir_node *node) {
1494 ir_node *left = get_Shr_left(node);
1495 ir_node *right = get_Shr_right(node);
1497 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1503 * Creates an ia32 Sar.
1505 * @return The created ia32 Shrs node
1507 static ir_node *gen_Shrs(ir_node *node) {
1508 ir_node *left = get_Shrs_left(node);
1509 ir_node *right = get_Shrs_right(node);
1510 ir_mode *mode = get_irn_mode(node);
1512 if(is_Const(right) && mode == mode_Is) {
1513 tarval *tv = get_Const_tarval(right);
1514 long val = get_tarval_long(tv);
1516 /* this is a sign extension */
1517 ir_graph *irg = current_ir_graph;
1518 dbg_info *dbgi = get_irn_dbg_info(node);
1519 ir_node *block = be_transform_node(get_nodes_block(node));
1521 ir_node *new_op = be_transform_node(op);
1522 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1523 add_irn_dep(pval, get_irg_frame(irg));
1525 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1529 /* 8 or 16 bit sign extension? */
1530 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1531 ir_node *shl_left = get_Shl_left(left);
1532 ir_node *shl_right = get_Shl_right(left);
1533 if(is_Const(shl_right)) {
1534 tarval *tv1 = get_Const_tarval(right);
1535 tarval *tv2 = get_Const_tarval(shl_right);
1536 if(tv1 == tv2 && tarval_is_long(tv1)) {
1537 long val = get_tarval_long(tv1);
1538 if(val == 16 || val == 24) {
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1550 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1559 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1565 * Creates an ia32 Rol.
1567 * @param op1 The first operator
1568 * @param op2 The second operator
1569 * @return The created ia32 RotL node
1571 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1572 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1578 * Creates an ia32 Ror.
1579 * NOTE: There is no RotR with immediate because this would always be a RotL
1580 * "imm-mode_size_bits" which can be pre-calculated.
1582 * @param op1 The first operator
1583 * @param op2 The second operator
1584 * @return The created ia32 RotR node
1586 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1587 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1593 * Creates an ia32 RotR or RotL (depending on the found pattern).
1595 * @return The created ia32 RotL or RotR node
1597 static ir_node *gen_Rotl(ir_node *node) {
1598 ir_node *rotate = NULL;
1599 ir_node *op1 = get_Rotl_left(node);
1600 ir_node *op2 = get_Rotl_right(node);
1602 /* Firm has only RotL, so we are looking for a right (op2)
1603 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1604 that means we can create a RotR instead of an Add and a RotL */
1608 ir_node *left = get_Add_left(add);
1609 ir_node *right = get_Add_right(add);
1610 if (is_Const(right)) {
1611 tarval *tv = get_Const_tarval(right);
1612 ir_mode *mode = get_irn_mode(node);
1613 long bits = get_mode_size_bits(mode);
1615 if (is_Minus(left) &&
1616 tarval_is_long(tv) &&
1617 get_tarval_long(tv) == bits &&
1620 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1621 rotate = gen_Ror(node, op1, get_Minus_op(left));
1626 if (rotate == NULL) {
1627 rotate = gen_Rol(node, op1, op2);
1636 * Transforms a Minus node.
1638 * @return The created ia32 Minus node
1640 static ir_node *gen_Minus(ir_node *node)
1642 ir_node *op = get_Minus_op(node);
1643 ir_node *block = be_transform_node(get_nodes_block(node));
1644 ir_graph *irg = current_ir_graph;
1645 dbg_info *dbgi = get_irn_dbg_info(node);
1646 ir_mode *mode = get_irn_mode(node);
1651 if (mode_is_float(mode)) {
1652 ir_node *new_op = be_transform_node(op);
1653 if (ia32_cg_config.use_sse2) {
1654 /* TODO: non-optimal... if we have many xXors, then we should
1655 * rather create a load for the const and use that instead of
1656 * several AM nodes... */
1657 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1658 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1659 ir_node *nomem = new_rd_NoMem(irg);
1661 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1662 nomem, new_op, noreg_xmm);
1664 size = get_mode_size_bits(mode);
1665 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1667 set_ia32_am_sc(new_node, ent);
1668 set_ia32_op_type(new_node, ia32_AddrModeS);
1669 set_ia32_ls_mode(new_node, mode);
1671 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1674 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1677 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1683 * Transforms a Not node.
1685 * @return The created ia32 Not node
1687 static ir_node *gen_Not(ir_node *node) {
1688 ir_node *op = get_Not_op(node);
1690 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1691 assert (! mode_is_float(get_irn_mode(node)));
1693 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1699 * Transforms an Abs node.
1701 * @return The created ia32 Abs node
1703 static ir_node *gen_Abs(ir_node *node)
1705 ir_node *block = get_nodes_block(node);
1706 ir_node *new_block = be_transform_node(block);
1707 ir_node *op = get_Abs_op(node);
1708 ir_graph *irg = current_ir_graph;
1709 dbg_info *dbgi = get_irn_dbg_info(node);
1710 ir_mode *mode = get_irn_mode(node);
1711 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1712 ir_node *nomem = new_NoMem();
1718 if (mode_is_float(mode)) {
1719 new_op = be_transform_node(op);
1721 if (ia32_cg_config.use_sse2) {
1722 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1723 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1724 nomem, new_op, noreg_fp);
1726 size = get_mode_size_bits(mode);
1727 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1729 set_ia32_am_sc(new_node, ent);
1731 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1733 set_ia32_op_type(new_node, ia32_AddrModeS);
1734 set_ia32_ls_mode(new_node, mode);
1736 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1737 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1740 ir_node *xor, *pval, *sign_extension;
1742 if (get_mode_size_bits(mode) == 32) {
1743 new_op = be_transform_node(op);
1745 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1748 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1749 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1752 add_irn_dep(pval, get_irg_frame(irg));
1753 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1755 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1756 nomem, new_op, sign_extension);
1757 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1759 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1760 nomem, xor, sign_extension);
1761 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1768 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1770 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1771 dbg_info *dbgi = get_irn_dbg_info(cmp);
1772 ir_node *block = get_nodes_block(cmp);
1773 ir_node *new_block = be_transform_node(block);
1774 ir_node *op1 = be_transform_node(x);
1775 ir_node *op2 = be_transform_node(n);
1777 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1781 * Transform a node returning a "flag" result.
1783 * @param node the node to transform
1784 * @param pnc_out the compare mode to use
1786 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1795 /* we have a Cmp as input */
1796 if (is_Proj(node)) {
1797 ir_node *pred = get_Proj_pred(node);
1799 pn_Cmp pnc = get_Proj_proj(node);
1800 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1801 ir_node *l = get_Cmp_left(pred);
1802 ir_node *r = get_Cmp_right(pred);
1804 ir_node *la = get_And_left(l);
1805 ir_node *ra = get_And_right(l);
1807 ir_node *c = get_Shl_left(la);
1808 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1809 /* (1 << n) & ra) */
1810 ir_node *n = get_Shl_right(la);
1811 flags = gen_bt(pred, ra, n);
1812 /* we must generate a Jc/Jnc jump */
1813 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1816 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1821 ir_node *c = get_Shl_left(ra);
1822 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1823 /* la & (1 << n)) */
1824 ir_node *n = get_Shl_right(ra);
1825 flags = gen_bt(pred, la, n);
1826 /* we must generate a Jc/Jnc jump */
1827 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1830 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1836 flags = be_transform_node(pred);
1842 /* a mode_b value, we have to compare it against 0 */
1843 dbgi = get_irn_dbg_info(node);
1844 new_block = be_transform_node(get_nodes_block(node));
1845 new_op = be_transform_node(node);
1846 noreg = ia32_new_NoReg_gp(env_cg);
1847 nomem = new_NoMem();
1848 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1849 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1850 *pnc_out = pn_Cmp_Lg;
1855 * Transforms a Load.
1857 * @return the created ia32 Load node
1859 static ir_node *gen_Load(ir_node *node) {
1860 ir_node *old_block = get_nodes_block(node);
1861 ir_node *block = be_transform_node(old_block);
1862 ir_node *ptr = get_Load_ptr(node);
1863 ir_node *mem = get_Load_mem(node);
1864 ir_node *new_mem = be_transform_node(mem);
1867 ir_graph *irg = current_ir_graph;
1868 dbg_info *dbgi = get_irn_dbg_info(node);
1869 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1870 ir_mode *mode = get_Load_mode(node);
1873 ia32_address_t addr;
1875 /* construct load address */
1876 memset(&addr, 0, sizeof(addr));
1877 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1884 base = be_transform_node(base);
1890 index = be_transform_node(index);
1893 if (mode_is_float(mode)) {
1894 if (ia32_cg_config.use_sse2) {
1895 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1897 res_mode = mode_xmm;
1899 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1901 res_mode = mode_vfp;
1904 assert(mode != mode_b);
1906 /* create a conv node with address mode for smaller modes */
1907 if(get_mode_size_bits(mode) < 32) {
1908 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1909 new_mem, noreg, mode);
1911 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1916 set_irn_pinned(new_node, get_irn_pinned(node));
1917 set_ia32_op_type(new_node, ia32_AddrModeS);
1918 set_ia32_ls_mode(new_node, mode);
1919 set_address(new_node, &addr);
1921 if(get_irn_pinned(node) == op_pin_state_floats) {
1922 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1925 /* make sure we are scheduled behind the initial IncSP/Barrier
1926 * to avoid spills being placed before it
1928 if (block == get_irg_start_block(irg)) {
1929 add_irn_dep(new_node, get_irg_frame(irg));
1932 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1937 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1938 ir_node *ptr, ir_node *other)
1945 /* we only use address mode if we're the only user of the load */
1946 if (get_irn_n_edges(node) > 1)
1949 load = get_Proj_pred(node);
1952 if (get_nodes_block(load) != block)
1955 /* store should have the same pointer as the load */
1956 if (get_Load_ptr(load) != ptr)
1959 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1960 if (other != NULL &&
1961 get_nodes_block(other) == block &&
1962 heights_reachable_in_block(heights, other, load)) {
1969 for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
1970 ir_node *const pred = get_Sync_pred(mem, i);
1972 if (is_Proj(pred) && get_Proj_pred(pred) == load)
1975 if (get_nodes_block(pred) == block &&
1976 heights_reachable_in_block(heights, pred, load)) {
1981 /* Store should be attached to the load */
1982 if (!is_Proj(mem) || get_Proj_pred(mem) != load)
1989 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1990 ir_node *mem, ir_node *ptr, ir_mode *mode,
1991 construct_binop_dest_func *func,
1992 construct_binop_dest_func *func8bit,
1993 match_flags_t flags)
1995 ir_node *src_block = get_nodes_block(node);
1997 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1998 ir_graph *irg = current_ir_graph;
2005 ia32_address_mode_t am;
2006 ia32_address_t *addr = &am.addr;
2007 memset(&am, 0, sizeof(am));
2009 assert(flags & match_dest_am);
2010 assert(flags & match_immediate); /* there is no destam node without... */
2011 commutative = (flags & match_commutative) != 0;
2013 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2014 build_address(&am, op1);
2015 new_op = create_immediate_or_transform(op2, 0);
2016 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2017 build_address(&am, op2);
2018 new_op = create_immediate_or_transform(op1, 0);
2023 if(addr->base == NULL)
2024 addr->base = noreg_gp;
2025 if(addr->index == NULL)
2026 addr->index = noreg_gp;
2027 if(addr->mem == NULL)
2028 addr->mem = new_NoMem();
2030 dbgi = get_irn_dbg_info(node);
2031 block = be_transform_node(src_block);
2032 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2034 if(get_mode_size_bits(mode) == 8) {
2035 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2038 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
2041 set_address(new_node, addr);
2042 set_ia32_op_type(new_node, ia32_AddrModeD);
2043 set_ia32_ls_mode(new_node, mode);
2044 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2046 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2047 mem_proj = be_transform_node(am.mem_proj);
2048 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2053 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2054 ir_node *ptr, ir_mode *mode,
2055 construct_unop_dest_func *func)
2057 ir_graph *irg = current_ir_graph;
2058 ir_node *src_block = get_nodes_block(node);
2064 ia32_address_mode_t am;
2065 ia32_address_t *addr = &am.addr;
2066 memset(&am, 0, sizeof(am));
2068 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2071 build_address(&am, op);
2073 dbgi = get_irn_dbg_info(node);
2074 block = be_transform_node(src_block);
2075 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2076 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
2077 set_address(new_node, addr);
2078 set_ia32_op_type(new_node, ia32_AddrModeD);
2079 set_ia32_ls_mode(new_node, mode);
2080 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2082 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2083 mem_proj = be_transform_node(am.mem_proj);
2084 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2089 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2090 ir_mode *mode = get_irn_mode(node);
2091 ir_node *mux_true = get_Mux_true(node);
2092 ir_node *mux_false = get_Mux_false(node);
2103 ia32_address_t addr;
2105 if(get_mode_size_bits(mode) != 8)
2108 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2110 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2116 build_address_ptr(&addr, ptr, mem);
2118 irg = current_ir_graph;
2119 dbgi = get_irn_dbg_info(node);
2120 block = get_nodes_block(node);
2121 new_block = be_transform_node(block);
2122 cond = get_Mux_sel(node);
2123 flags = get_flags_node(cond, &pnc);
2124 new_mem = be_transform_node(mem);
2125 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2126 addr.index, addr.mem, flags, pnc, negated);
2127 set_address(new_node, &addr);
2128 set_ia32_op_type(new_node, ia32_AddrModeD);
2129 set_ia32_ls_mode(new_node, mode);
2130 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2135 static ir_node *try_create_dest_am(ir_node *node) {
2136 ir_node *val = get_Store_value(node);
2137 ir_node *mem = get_Store_mem(node);
2138 ir_node *ptr = get_Store_ptr(node);
2139 ir_mode *mode = get_irn_mode(val);
2140 unsigned bits = get_mode_size_bits(mode);
2145 /* handle only GP modes for now... */
2146 if(!ia32_mode_needs_gp_reg(mode))
2150 /* store must be the only user of the val node */
2151 if(get_irn_n_edges(val) > 1)
2153 /* skip pointless convs */
2155 ir_node *conv_op = get_Conv_op(val);
2156 ir_mode *pred_mode = get_irn_mode(conv_op);
2157 if (!ia32_mode_needs_gp_reg(pred_mode))
2159 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2167 /* value must be in the same block */
2168 if(get_nodes_block(node) != get_nodes_block(val))
2171 switch (get_irn_opcode(val)) {
2173 op1 = get_Add_left(val);
2174 op2 = get_Add_right(val);
2175 if(is_Const_1(op2)) {
2176 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2177 new_rd_ia32_IncMem);
2179 } else if(is_Const_Minus_1(op2)) {
2180 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2181 new_rd_ia32_DecMem);
2184 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2185 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2186 match_dest_am | match_commutative |
2190 op1 = get_Sub_left(val);
2191 op2 = get_Sub_right(val);
2192 if (is_Const(op2)) {
2193 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2195 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2196 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2197 match_dest_am | match_immediate |
2201 op1 = get_And_left(val);
2202 op2 = get_And_right(val);
2203 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2204 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2205 match_dest_am | match_commutative |
2209 op1 = get_Or_left(val);
2210 op2 = get_Or_right(val);
2211 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2212 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2213 match_dest_am | match_commutative |
2217 op1 = get_Eor_left(val);
2218 op2 = get_Eor_right(val);
2219 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2220 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2221 match_dest_am | match_commutative |
2225 op1 = get_Shl_left(val);
2226 op2 = get_Shl_right(val);
2227 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2228 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2229 match_dest_am | match_immediate);
2232 op1 = get_Shr_left(val);
2233 op2 = get_Shr_right(val);
2234 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2235 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2236 match_dest_am | match_immediate);
2239 op1 = get_Shrs_left(val);
2240 op2 = get_Shrs_right(val);
2241 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2242 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2243 match_dest_am | match_immediate);
2246 op1 = get_Rotl_left(val);
2247 op2 = get_Rotl_right(val);
2248 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2249 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2250 match_dest_am | match_immediate);
2252 /* TODO: match ROR patterns... */
2254 new_node = try_create_SetMem(val, ptr, mem);
2257 op1 = get_Minus_op(val);
2258 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2261 /* should be lowered already */
2262 assert(mode != mode_b);
2263 op1 = get_Not_op(val);
2264 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2270 if(new_node != NULL) {
2271 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2272 get_irn_pinned(node) == op_pin_state_pinned) {
2273 set_irn_pinned(new_node, op_pin_state_pinned);
2280 static int is_float_to_int32_conv(const ir_node *node)
2282 ir_mode *mode = get_irn_mode(node);
2286 if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
2288 /* don't report unsigned as conv to 32bit, because we really need to do
2289 * a vfist with 64bit signed in this case */
2290 if(!mode_is_signed(mode))
2295 conv_op = get_Conv_op(node);
2296 conv_mode = get_irn_mode(conv_op);
2298 if(!mode_is_float(conv_mode))
2305 * Transform a Store(floatConst).
2307 * @return the created ia32 Store node
2309 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2311 ir_mode *mode = get_irn_mode(cns);
2312 unsigned size = get_mode_size_bytes(mode);
2313 tarval *tv = get_Const_tarval(cns);
2314 ir_node *block = get_nodes_block(node);
2315 ir_node *new_block = be_transform_node(block);
2316 ir_node *ptr = get_Store_ptr(node);
2317 ir_node *mem = get_Store_mem(node);
2318 ir_graph *irg = current_ir_graph;
2319 dbg_info *dbgi = get_irn_dbg_info(node);
2323 ia32_address_t addr;
2325 assert(size % 4 == 0);
2328 build_address_ptr(&addr, ptr, mem);
2332 get_tarval_sub_bits(tv, ofs) |
2333 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2334 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2335 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2336 ir_node *imm = create_Immediate(NULL, 0, val);
2338 ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2339 addr.index, addr.mem, imm);
2341 set_irn_pinned(new_node, get_irn_pinned(node));
2342 set_ia32_op_type(new_node, ia32_AddrModeD);
2343 set_ia32_ls_mode(new_node, mode_Iu);
2344 set_address(new_node, &addr);
2345 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2347 ins[i++] = new_node;
2352 } while (size != 0);
2354 return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
2358 * Generate a vfist or vfisttp instruction.
2360 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2361 ir_node *mem, ir_node *val, ir_node **fist)
2365 if (ia32_cg_config.use_fisttp) {
2366 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2367 if other users exists */
2368 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2369 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2370 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2371 be_new_Keep(reg_class, irg, block, 1, &value);
2373 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2376 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2379 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2385 * Transforms a normal Store.
2387 * @return the created ia32 Store node
2389 static ir_node *gen_normal_Store(ir_node *node)
2391 ir_node *val = get_Store_value(node);
2392 ir_mode *mode = get_irn_mode(val);
2393 ir_node *block = get_nodes_block(node);
2394 ir_node *new_block = be_transform_node(block);
2395 ir_node *ptr = get_Store_ptr(node);
2396 ir_node *mem = get_Store_mem(node);
2397 ir_graph *irg = current_ir_graph;
2398 dbg_info *dbgi = get_irn_dbg_info(node);
2399 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2400 ir_node *new_val, *new_node, *store;
2401 ia32_address_t addr;
2403 /* check for destination address mode */
2404 new_node = try_create_dest_am(node);
2405 if (new_node != NULL)
2408 /* construct store address */
2409 memset(&addr, 0, sizeof(addr));
2410 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2412 if (addr.base == NULL) {
2415 addr.base = be_transform_node(addr.base);
2418 if (addr.index == NULL) {
2421 addr.index = be_transform_node(addr.index);
2423 addr.mem = be_transform_node(mem);
2425 if (mode_is_float(mode)) {
2426 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2428 while (is_Conv(val) && mode == get_irn_mode(val)) {
2429 ir_node *op = get_Conv_op(val);
2430 if (!mode_is_float(get_irn_mode(op)))
2434 new_val = be_transform_node(val);
2435 if (ia32_cg_config.use_sse2) {
2436 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2437 addr.index, addr.mem, new_val);
2439 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2440 addr.index, addr.mem, new_val, mode);
2443 } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
2444 val = get_Conv_op(val);
2446 /* TODO: is this optimisation still necessary at all (middleend)? */
2447 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2448 while (is_Conv(val)) {
2449 ir_node *op = get_Conv_op(val);
2450 if (!mode_is_float(get_irn_mode(op)))
2452 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2456 new_val = be_transform_node(val);
2457 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2459 new_val = create_immediate_or_transform(val, 0);
2460 assert(mode != mode_b);
2462 if (get_mode_size_bits(mode) == 8) {
2463 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2464 addr.index, addr.mem, new_val);
2466 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2467 addr.index, addr.mem, new_val);
2472 set_irn_pinned(store, get_irn_pinned(node));
2473 set_ia32_op_type(store, ia32_AddrModeD);
2474 set_ia32_ls_mode(store, mode);
2476 set_address(store, &addr);
2477 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2483 * Transforms a Store.
2485 * @return the created ia32 Store node
2487 static ir_node *gen_Store(ir_node *node)
2489 ir_node *val = get_Store_value(node);
2490 ir_mode *mode = get_irn_mode(val);
2492 if (mode_is_float(mode) && is_Const(val)) {
2495 /* we are storing a floating point constant */
2496 if (ia32_cg_config.use_sse2) {
2497 transform = !is_simple_sse_Const(val);
2499 transform = !is_simple_x87_Const(val);
2502 return gen_float_const_Store(node, val);
2504 return gen_normal_Store(node);
2508 * Transforms a Switch.
2510 * @return the created ia32 SwitchJmp node
2512 static ir_node *create_Switch(ir_node *node)
2514 ir_graph *irg = current_ir_graph;
2515 dbg_info *dbgi = get_irn_dbg_info(node);
2516 ir_node *block = be_transform_node(get_nodes_block(node));
2517 ir_node *sel = get_Cond_selector(node);
2518 ir_node *new_sel = be_transform_node(sel);
2519 int switch_min = INT_MAX;
2520 int switch_max = INT_MIN;
2521 long default_pn = get_Cond_defaultProj(node);
2523 const ir_edge_t *edge;
2525 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2527 /* determine the smallest switch case value */
2528 foreach_out_edge(node, edge) {
2529 ir_node *proj = get_edge_src_irn(edge);
2530 long pn = get_Proj_proj(proj);
2531 if(pn == default_pn)
2540 if((unsigned) (switch_max - switch_min) > 256000) {
2541 panic("Size of switch %+F bigger than 256000", node);
2544 if (switch_min != 0) {
2545 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2547 /* if smallest switch case is not 0 we need an additional sub */
2548 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2549 add_ia32_am_offs_int(new_sel, -switch_min);
2550 set_ia32_op_type(new_sel, ia32_AddrModeS);
2552 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2555 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2556 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2562 * Transform a Cond node.
2564 static ir_node *gen_Cond(ir_node *node) {
2565 ir_node *block = get_nodes_block(node);
2566 ir_node *new_block = be_transform_node(block);
2567 ir_graph *irg = current_ir_graph;
2568 dbg_info *dbgi = get_irn_dbg_info(node);
2569 ir_node *sel = get_Cond_selector(node);
2570 ir_mode *sel_mode = get_irn_mode(sel);
2571 ir_node *flags = NULL;
2575 if (sel_mode != mode_b) {
2576 return create_Switch(node);
2579 /* we get flags from a Cmp */
2580 flags = get_flags_node(sel, &pnc);
2582 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2583 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2588 static ir_node *gen_be_Copy(ir_node *node)
2590 ir_node *new_node = be_duplicate_node(node);
2591 ir_mode *mode = get_irn_mode(new_node);
2593 if (ia32_mode_needs_gp_reg(mode)) {
2594 set_irn_mode(new_node, mode_Iu);
2600 static ir_node *create_Fucom(ir_node *node)
2602 ir_graph *irg = current_ir_graph;
2603 dbg_info *dbgi = get_irn_dbg_info(node);
2604 ir_node *block = get_nodes_block(node);
2605 ir_node *new_block = be_transform_node(block);
2606 ir_node *left = get_Cmp_left(node);
2607 ir_node *new_left = be_transform_node(left);
2608 ir_node *right = get_Cmp_right(node);
2612 if(ia32_cg_config.use_fucomi) {
2613 new_right = be_transform_node(right);
2614 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2616 set_ia32_commutative(new_node);
2617 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2619 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2620 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2623 new_right = be_transform_node(right);
2624 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2628 set_ia32_commutative(new_node);
2630 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2632 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2633 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2639 static ir_node *create_Ucomi(ir_node *node)
2641 ir_graph *irg = current_ir_graph;
2642 dbg_info *dbgi = get_irn_dbg_info(node);
2643 ir_node *src_block = get_nodes_block(node);
2644 ir_node *new_block = be_transform_node(src_block);
2645 ir_node *left = get_Cmp_left(node);
2646 ir_node *right = get_Cmp_right(node);
2648 ia32_address_mode_t am;
2649 ia32_address_t *addr = &am.addr;
2651 match_arguments(&am, src_block, left, right, NULL,
2652 match_commutative | match_am);
2654 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2655 addr->mem, am.new_op1, am.new_op2,
2657 set_am_attributes(new_node, &am);
2659 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2661 new_node = fix_mem_proj(new_node, &am);
2667 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2668 * to fold an and into a test node
2670 static bool can_fold_test_and(ir_node *node)
2672 const ir_edge_t *edge;
2674 /** we can only have eq and lg projs */
2675 foreach_out_edge(node, edge) {
2676 ir_node *proj = get_edge_src_irn(edge);
2677 pn_Cmp pnc = get_Proj_proj(proj);
2678 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2686 * returns true if it is assured, that the upper bits of a node are "clean"
2687 * which means for a 16 or 8 bit value, that the upper bits in the register
2688 * are 0 for unsigned and a copy of the last significant bit for unsigned
2691 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2693 assert(ia32_mode_needs_gp_reg(mode));
2694 if (get_mode_size_bits(mode) >= 32)
2697 if (is_Proj(transformed_node))
2698 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2700 if (is_ia32_Conv_I2I(transformed_node)
2701 || is_ia32_Conv_I2I8Bit(transformed_node)) {
2702 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2703 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2705 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2711 if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
2712 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2713 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2714 const ia32_immediate_attr_t *attr
2715 = get_ia32_immediate_attr_const(right);
2716 if (attr->symconst == 0
2717 && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
2721 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2724 if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
2725 ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
2726 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2727 const ia32_immediate_attr_t *attr
2728 = get_ia32_immediate_attr_const(right);
2729 if (attr->symconst == 0
2730 && (unsigned) attr->offset
2731 <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
2738 /* TODO recurse on Or, Xor, ... if appropriate? */
2740 if (is_ia32_Immediate(transformed_node)
2741 || is_ia32_Const(transformed_node)) {
2742 const ia32_immediate_attr_t *attr
2743 = get_ia32_immediate_attr_const(transformed_node);
2744 if (mode_is_signed(mode)) {
2745 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2746 if (shifted == 0 || shifted == -1)
2749 unsigned long shifted = (unsigned long) attr->offset;
2750 shifted >>= get_mode_size_bits(mode);
2760 * Generate code for a Cmp.
2762 static ir_node *gen_Cmp(ir_node *node)
2764 ir_graph *irg = current_ir_graph;
2765 dbg_info *dbgi = get_irn_dbg_info(node);
2766 ir_node *block = get_nodes_block(node);
2767 ir_node *new_block = be_transform_node(block);
2768 ir_node *left = get_Cmp_left(node);
2769 ir_node *right = get_Cmp_right(node);
2770 ir_mode *cmp_mode = get_irn_mode(left);
2772 ia32_address_mode_t am;
2773 ia32_address_t *addr = &am.addr;
2776 if(mode_is_float(cmp_mode)) {
2777 if (ia32_cg_config.use_sse2) {
2778 return create_Ucomi(node);
2780 return create_Fucom(node);
2784 assert(ia32_mode_needs_gp_reg(cmp_mode));
2786 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2787 cmp_unsigned = !mode_is_signed(cmp_mode);
2788 if (is_Const_0(right) &&
2790 get_irn_n_edges(left) == 1 &&
2791 can_fold_test_and(node)) {
2792 /* Test(and_left, and_right) */
2793 ir_node *and_left = get_And_left(left);
2794 ir_node *and_right = get_And_right(left);
2796 /* matze: code here used mode instead of cmd_mode, I think it is always
2797 * the same as cmp_mode, but I leave this here to see if this is really
2800 assert(get_irn_mode(and_left) == cmp_mode);
2802 match_arguments(&am, block, and_left, and_right, NULL,
2804 match_am | match_8bit_am | match_16bit_am |
2805 match_am_and_immediates | match_immediate |
2806 match_8bit | match_16bit);
2808 /* use 32bit compare mode if possible since the opcode is smaller */
2809 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2810 upper_bits_clean(am.new_op2, cmp_mode)) {
2811 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2814 if (get_mode_size_bits(cmp_mode) == 8) {
2815 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2816 addr->index, addr->mem, am.new_op1,
2817 am.new_op2, am.ins_permuted,
2820 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2821 addr->index, addr->mem, am.new_op1,
2822 am.new_op2, am.ins_permuted,
2826 /* Cmp(left, right) */
2827 match_arguments(&am, block, left, right, NULL,
2828 match_commutative | match_am | match_8bit_am |
2829 match_16bit_am | match_am_and_immediates |
2830 match_immediate | match_8bit | match_16bit);
2831 /* use 32bit compare mode if possible since the opcode is smaller */
2832 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2833 upper_bits_clean(am.new_op2, cmp_mode)) {
2834 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2837 if (get_mode_size_bits(cmp_mode) == 8) {
2838 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2839 addr->index, addr->mem, am.new_op1,
2840 am.new_op2, am.ins_permuted,
2843 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2844 addr->index, addr->mem, am.new_op1,
2845 am.new_op2, am.ins_permuted, cmp_unsigned);
2848 set_am_attributes(new_node, &am);
2849 set_ia32_ls_mode(new_node, cmp_mode);
2851 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2853 new_node = fix_mem_proj(new_node, &am);
2858 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2861 ir_graph *irg = current_ir_graph;
2862 dbg_info *dbgi = get_irn_dbg_info(node);
2863 ir_node *block = get_nodes_block(node);
2864 ir_node *new_block = be_transform_node(block);
2865 ir_node *val_true = get_Mux_true(node);
2866 ir_node *val_false = get_Mux_false(node);
2868 match_flags_t match_flags;
2869 ia32_address_mode_t am;
2870 ia32_address_t *addr;
2872 assert(ia32_cg_config.use_cmov);
2873 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2877 match_flags = match_commutative | match_am | match_16bit_am |
2880 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2882 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2883 addr->mem, am.new_op1, am.new_op2, new_flags,
2884 am.ins_permuted, pnc);
2885 set_am_attributes(new_node, &am);
2887 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2889 new_node = fix_mem_proj(new_node, &am);
2895 * Creates a ia32 Setcc instruction.
2897 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2898 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2901 ir_graph *irg = current_ir_graph;
2902 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2903 ir_node *nomem = new_NoMem();
2904 ir_mode *mode = get_irn_mode(orig_node);
2907 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2908 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2910 /* we might need to conv the result up */
2911 if (get_mode_size_bits(mode) > 8) {
2912 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2913 nomem, new_node, mode_Bu);
2914 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2921 * Create instruction for an unsigned Difference or Zero.
2923 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2924 ir_graph *irg = current_ir_graph;
2925 ir_mode *mode = get_irn_mode(psi);
2926 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2929 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2930 match_mode_neutral | match_am | match_immediate | match_two_users);
2932 block = get_nodes_block(new_node);
2934 if (is_Proj(new_node)) {
2935 sub = get_Proj_pred(new_node);
2936 assert(is_ia32_Sub(sub));
2939 set_irn_mode(sub, mode_T);
2940 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2942 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2944 dbgi = get_irn_dbg_info(psi);
2945 noreg = ia32_new_NoReg_gp(env_cg);
2946 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2947 nomem = new_NoMem();
2948 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2950 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2951 set_ia32_commutative(new_node);
2956 * Transforms a Mux node into CMov.
2958 * @return The transformed node.
2960 static ir_node *gen_Mux(ir_node *node)
2962 dbg_info *dbgi = get_irn_dbg_info(node);
2963 ir_node *block = get_nodes_block(node);
2964 ir_node *new_block = be_transform_node(block);
2965 ir_node *mux_true = get_Mux_true(node);
2966 ir_node *mux_false = get_Mux_false(node);
2967 ir_node *cond = get_Mux_sel(node);
2968 ir_mode *mode = get_irn_mode(node);
2971 assert(get_irn_mode(cond) == mode_b);
2973 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2974 if (mode_is_float(mode)) {
2975 ir_node *cmp = get_Proj_pred(cond);
2976 ir_node *cmp_left = get_Cmp_left(cmp);
2977 ir_node *cmp_right = get_Cmp_right(cmp);
2978 pn_Cmp pnc = get_Proj_proj(cond);
2980 if (ia32_cg_config.use_sse2) {
2981 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2982 if (cmp_left == mux_true && cmp_right == mux_false) {
2983 /* Mux(a <= b, a, b) => MIN */
2984 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2985 match_commutative | match_am | match_two_users);
2986 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2987 /* Mux(a <= b, b, a) => MAX */
2988 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2989 match_commutative | match_am | match_two_users);
2991 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2992 if (cmp_left == mux_true && cmp_right == mux_false) {
2993 /* Mux(a >= b, a, b) => MAX */
2994 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2995 match_commutative | match_am | match_two_users);
2996 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2997 /* Mux(a >= b, b, a) => MIN */
2998 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2999 match_commutative | match_am | match_two_users);
3003 panic("cannot transform floating point Mux");
3009 assert(ia32_mode_needs_gp_reg(mode));
3011 if (is_Proj(cond)) {
3012 ir_node *cmp = get_Proj_pred(cond);
3014 ir_node *cmp_left = get_Cmp_left(cmp);
3015 ir_node *cmp_right = get_Cmp_right(cmp);
3016 pn_Cmp pnc = get_Proj_proj(cond);
3018 /* check for unsigned Doz first */
3019 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3020 is_Const_0(mux_false) && is_Sub(mux_true) &&
3021 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3022 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3023 return create_Doz(node, cmp_left, cmp_right);
3024 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3025 is_Const_0(mux_true) && is_Sub(mux_false) &&
3026 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3027 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3028 return create_Doz(node, cmp_left, cmp_right);
3033 flags = get_flags_node(cond, &pnc);
3035 if (is_Const(mux_true) && is_Const(mux_false)) {
3036 /* both are const, good */
3037 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3038 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3039 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3040 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3042 /* Not that simple. */
3047 new_node = create_CMov(node, cond, flags, pnc);
3055 * Create a conversion from x87 state register to general purpose.
3057 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
3058 ir_node *block = be_transform_node(get_nodes_block(node));
3059 ir_node *op = get_Conv_op(node);
3060 ir_node *new_op = be_transform_node(op);
3061 ia32_code_gen_t *cg = env_cg;
3062 ir_graph *irg = current_ir_graph;
3063 dbg_info *dbgi = get_irn_dbg_info(node);
3064 ir_node *noreg = ia32_new_NoReg_gp(cg);
3065 ir_mode *mode = get_irn_mode(node);
3066 ir_node *fist, *load, *mem;
3068 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3069 set_irn_pinned(fist, op_pin_state_floats);
3070 set_ia32_use_frame(fist);
3071 set_ia32_op_type(fist, ia32_AddrModeD);
3073 assert(get_mode_size_bits(mode) <= 32);
3074 /* exception we can only store signed 32 bit integers, so for unsigned
3075 we store a 64bit (signed) integer and load the lower bits */
3076 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3077 set_ia32_ls_mode(fist, mode_Ls);
3079 set_ia32_ls_mode(fist, mode_Is);
3081 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3084 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3086 set_irn_pinned(load, op_pin_state_floats);
3087 set_ia32_use_frame(load);
3088 set_ia32_op_type(load, ia32_AddrModeS);
3089 set_ia32_ls_mode(load, mode_Is);
3090 if(get_ia32_ls_mode(fist) == mode_Ls) {
3091 ia32_attr_t *attr = get_ia32_attr(load);
3092 attr->data.need_64bit_stackent = 1;
3094 ia32_attr_t *attr = get_ia32_attr(load);
3095 attr->data.need_32bit_stackent = 1;
3097 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3099 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3103 * Creates a x87 strict Conv by placing a Store and a Load
3105 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3107 ir_node *block = get_nodes_block(node);
3108 ir_graph *irg = current_ir_graph;
3109 dbg_info *dbgi = get_irn_dbg_info(node);
3110 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3111 ir_node *nomem = new_NoMem();
3112 ir_node *frame = get_irg_frame(irg);
3113 ir_node *store, *load;
3116 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3118 set_ia32_use_frame(store);
3119 set_ia32_op_type(store, ia32_AddrModeD);
3120 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3122 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3124 set_ia32_use_frame(load);
3125 set_ia32_op_type(load, ia32_AddrModeS);
3126 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3128 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3133 * Create a conversion from general purpose to x87 register
3135 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3136 ir_node *src_block = get_nodes_block(node);
3137 ir_node *block = be_transform_node(src_block);
3138 ir_graph *irg = current_ir_graph;
3139 dbg_info *dbgi = get_irn_dbg_info(node);
3140 ir_node *op = get_Conv_op(node);
3141 ir_node *new_op = NULL;
3145 ir_mode *store_mode;
3151 /* fild can use source AM if the operand is a signed 32bit integer */
3152 if (src_mode == mode_Is) {
3153 ia32_address_mode_t am;
3155 match_arguments(&am, src_block, NULL, op, NULL,
3156 match_am | match_try_am);
3157 if (am.op_type == ia32_AddrModeS) {
3158 ia32_address_t *addr = &am.addr;
3160 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3161 addr->index, addr->mem);
3162 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3165 set_am_attributes(fild, &am);
3166 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3168 fix_mem_proj(fild, &am);
3173 if(new_op == NULL) {
3174 new_op = be_transform_node(op);
3177 noreg = ia32_new_NoReg_gp(env_cg);
3178 nomem = new_NoMem();
3179 mode = get_irn_mode(op);
3181 /* first convert to 32 bit signed if necessary */
3182 src_bits = get_mode_size_bits(src_mode);
3183 if (src_bits == 8) {
3184 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3186 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3188 } else if (src_bits < 32) {
3189 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3191 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3195 assert(get_mode_size_bits(mode) == 32);
3198 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3201 set_ia32_use_frame(store);
3202 set_ia32_op_type(store, ia32_AddrModeD);
3203 set_ia32_ls_mode(store, mode_Iu);
3205 /* exception for 32bit unsigned, do a 64bit spill+load */
3206 if(!mode_is_signed(mode)) {
3209 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3211 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3212 get_irg_frame(irg), noreg, nomem,
3215 set_ia32_use_frame(zero_store);
3216 set_ia32_op_type(zero_store, ia32_AddrModeD);
3217 add_ia32_am_offs_int(zero_store, 4);
3218 set_ia32_ls_mode(zero_store, mode_Iu);
3223 store = new_rd_Sync(dbgi, irg, block, 2, in);
3224 store_mode = mode_Ls;
3226 store_mode = mode_Is;
3230 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3232 set_ia32_use_frame(fild);
3233 set_ia32_op_type(fild, ia32_AddrModeS);
3234 set_ia32_ls_mode(fild, store_mode);
3236 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3242 * Create a conversion from one integer mode into another one
3244 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3245 dbg_info *dbgi, ir_node *block, ir_node *op,
3248 ir_graph *irg = current_ir_graph;
3249 int src_bits = get_mode_size_bits(src_mode);
3250 int tgt_bits = get_mode_size_bits(tgt_mode);
3251 ir_node *new_block = be_transform_node(block);
3253 ir_mode *smaller_mode;
3255 ia32_address_mode_t am;
3256 ia32_address_t *addr = &am.addr;
3259 if (src_bits < tgt_bits) {
3260 smaller_mode = src_mode;
3261 smaller_bits = src_bits;
3263 smaller_mode = tgt_mode;
3264 smaller_bits = tgt_bits;
3267 #ifdef DEBUG_libfirm
3269 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3274 match_arguments(&am, block, NULL, op, NULL,
3275 match_8bit | match_16bit |
3276 match_am | match_8bit_am | match_16bit_am);
3278 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3279 /* unnecessary conv. in theory it shouldn't have been AM */
3280 assert(is_ia32_NoReg_GP(addr->base));
3281 assert(is_ia32_NoReg_GP(addr->index));
3282 assert(is_NoMem(addr->mem));
3283 assert(am.addr.offset == 0);
3284 assert(am.addr.symconst_ent == NULL);
3288 if (smaller_bits == 8) {
3289 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3290 addr->index, addr->mem, am.new_op2,
3293 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3294 addr->index, addr->mem, am.new_op2,
3297 set_am_attributes(new_node, &am);
3298 /* match_arguments assume that out-mode = in-mode, this isn't true here
3300 set_ia32_ls_mode(new_node, smaller_mode);
3301 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3302 new_node = fix_mem_proj(new_node, &am);
3307 * Transforms a Conv node.
3309 * @return The created ia32 Conv node
3311 static ir_node *gen_Conv(ir_node *node) {
3312 ir_node *block = get_nodes_block(node);
3313 ir_node *new_block = be_transform_node(block);
3314 ir_node *op = get_Conv_op(node);
3315 ir_node *new_op = NULL;
3316 ir_graph *irg = current_ir_graph;
3317 dbg_info *dbgi = get_irn_dbg_info(node);
3318 ir_mode *src_mode = get_irn_mode(op);
3319 ir_mode *tgt_mode = get_irn_mode(node);
3320 int src_bits = get_mode_size_bits(src_mode);
3321 int tgt_bits = get_mode_size_bits(tgt_mode);
3322 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3323 ir_node *nomem = new_rd_NoMem(irg);
3324 ir_node *res = NULL;
3326 if (src_mode == mode_b) {
3327 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3328 /* nothing to do, we already model bools as 0/1 ints */
3329 return be_transform_node(op);
3332 if (src_mode == tgt_mode) {
3333 if (get_Conv_strict(node)) {
3334 if (ia32_cg_config.use_sse2) {
3335 /* when we are in SSE mode, we can kill all strict no-op conversion */
3336 return be_transform_node(op);
3339 /* this should be optimized already, but who knows... */
3340 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3341 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3342 return be_transform_node(op);
3346 if (mode_is_float(src_mode)) {
3347 new_op = be_transform_node(op);
3348 /* we convert from float ... */
3349 if (mode_is_float(tgt_mode)) {
3350 if(src_mode == mode_E && tgt_mode == mode_D
3351 && !get_Conv_strict(node)) {
3352 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3357 if (ia32_cg_config.use_sse2) {
3358 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3359 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3361 set_ia32_ls_mode(res, tgt_mode);
3363 if(get_Conv_strict(node)) {
3364 res = gen_x87_strict_conv(tgt_mode, new_op);
3365 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3368 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3373 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3374 if (ia32_cg_config.use_sse2) {
3375 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3377 set_ia32_ls_mode(res, src_mode);
3379 return gen_x87_fp_to_gp(node);
3383 /* we convert from int ... */
3384 if (mode_is_float(tgt_mode)) {
3386 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3387 if (ia32_cg_config.use_sse2) {
3388 new_op = be_transform_node(op);
3389 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3391 set_ia32_ls_mode(res, tgt_mode);
3393 res = gen_x87_gp_to_fp(node, src_mode);
3394 if(get_Conv_strict(node)) {
3395 /* The strict-Conv is only necessary, if the int mode has more bits
3396 * than the float mantissa */
3397 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3398 size_t float_mantissa;
3399 /* FIXME There is no way to get the mantissa size of a mode */
3400 switch (get_mode_size_bits(tgt_mode)) {
3401 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3402 case 64: float_mantissa = 52 + 1; break;
3404 case 96: float_mantissa = 64; break;
3405 default: float_mantissa = 0; break;
3407 if (float_mantissa < int_mantissa) {
3408 res = gen_x87_strict_conv(tgt_mode, res);
3409 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3414 } else if(tgt_mode == mode_b) {
3415 /* mode_b lowering already took care that we only have 0/1 values */
3416 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3417 src_mode, tgt_mode));
3418 return be_transform_node(op);
3421 if (src_bits == tgt_bits) {
3422 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3423 src_mode, tgt_mode));
3424 return be_transform_node(op);
3427 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3435 static ir_node *create_immediate_or_transform(ir_node *node,
3436 char immediate_constraint_type)
3438 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3439 if (new_node == NULL) {
3440 new_node = be_transform_node(node);
3446 * Transforms a FrameAddr into an ia32 Add.
3448 static ir_node *gen_be_FrameAddr(ir_node *node) {
3449 ir_node *block = be_transform_node(get_nodes_block(node));
3450 ir_node *op = be_get_FrameAddr_frame(node);
3451 ir_node *new_op = be_transform_node(op);
3452 ir_graph *irg = current_ir_graph;
3453 dbg_info *dbgi = get_irn_dbg_info(node);
3454 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3457 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3458 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3459 set_ia32_use_frame(new_node);
3461 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3467 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3469 static ir_node *gen_be_Return(ir_node *node) {
3470 ir_graph *irg = current_ir_graph;
3471 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3472 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3473 ir_entity *ent = get_irg_entity(irg);
3474 ir_type *tp = get_entity_type(ent);
3479 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3480 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3483 int pn_ret_val, pn_ret_mem, arity, i;
3485 assert(ret_val != NULL);
3486 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3487 return be_duplicate_node(node);
3490 res_type = get_method_res_type(tp, 0);
3492 if (! is_Primitive_type(res_type)) {
3493 return be_duplicate_node(node);
3496 mode = get_type_mode(res_type);
3497 if (! mode_is_float(mode)) {
3498 return be_duplicate_node(node);
3501 assert(get_method_n_ress(tp) == 1);
3503 pn_ret_val = get_Proj_proj(ret_val);
3504 pn_ret_mem = get_Proj_proj(ret_mem);
3506 /* get the Barrier */
3507 barrier = get_Proj_pred(ret_val);
3509 /* get result input of the Barrier */
3510 ret_val = get_irn_n(barrier, pn_ret_val);
3511 new_ret_val = be_transform_node(ret_val);
3513 /* get memory input of the Barrier */
3514 ret_mem = get_irn_n(barrier, pn_ret_mem);
3515 new_ret_mem = be_transform_node(ret_mem);
3517 frame = get_irg_frame(irg);
3519 dbgi = get_irn_dbg_info(barrier);
3520 block = be_transform_node(get_nodes_block(barrier));
3522 noreg = ia32_new_NoReg_gp(env_cg);
3524 /* store xmm0 onto stack */
3525 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3526 new_ret_mem, new_ret_val);
3527 set_ia32_ls_mode(sse_store, mode);
3528 set_ia32_op_type(sse_store, ia32_AddrModeD);
3529 set_ia32_use_frame(sse_store);
3531 /* load into x87 register */
3532 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3533 set_ia32_op_type(fld, ia32_AddrModeS);
3534 set_ia32_use_frame(fld);
3536 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3537 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3539 /* create a new barrier */
3540 arity = get_irn_arity(barrier);
3541 in = alloca(arity * sizeof(in[0]));
3542 for (i = 0; i < arity; ++i) {
3545 if (i == pn_ret_val) {
3547 } else if (i == pn_ret_mem) {
3550 ir_node *in = get_irn_n(barrier, i);
3551 new_in = be_transform_node(in);
3556 new_barrier = new_ir_node(dbgi, irg, block,
3557 get_irn_op(barrier), get_irn_mode(barrier),
3559 copy_node_attr(barrier, new_barrier);
3560 be_duplicate_deps(barrier, new_barrier);
3561 set_transformed_and_mark(barrier, new_barrier);
3563 /* transform normally */
3564 return be_duplicate_node(node);
3568 * Transform a be_AddSP into an ia32_SubSP.
3570 static ir_node *gen_be_AddSP(ir_node *node)
3572 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3573 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3575 return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
3576 match_am | match_immediate);
3580 * Transform a be_SubSP into an ia32_AddSP
3582 static ir_node *gen_be_SubSP(ir_node *node)
3584 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3585 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3587 return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
3588 match_am | match_immediate);
3592 * Change some phi modes
3594 static ir_node *gen_Phi(ir_node *node) {
3595 ir_node *block = be_transform_node(get_nodes_block(node));
3596 ir_graph *irg = current_ir_graph;
3597 dbg_info *dbgi = get_irn_dbg_info(node);
3598 ir_mode *mode = get_irn_mode(node);
3601 if(ia32_mode_needs_gp_reg(mode)) {
3602 /* we shouldn't have any 64bit stuff around anymore */
3603 assert(get_mode_size_bits(mode) <= 32);
3604 /* all integer operations are on 32bit registers now */
3606 } else if(mode_is_float(mode)) {
3607 if (ia32_cg_config.use_sse2) {
3614 /* phi nodes allow loops, so we use the old arguments for now
3615 * and fix this later */
3616 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3617 get_irn_in(node) + 1);
3618 copy_node_attr(node, phi);
3619 be_duplicate_deps(node, phi);
3621 be_set_transformed_node(node, phi);
3622 be_enqueue_preds(node);
3630 static ir_node *gen_IJmp(ir_node *node)
3632 ir_node *block = get_nodes_block(node);
3633 ir_node *new_block = be_transform_node(block);
3634 dbg_info *dbgi = get_irn_dbg_info(node);
3635 ir_node *op = get_IJmp_target(node);
3637 ia32_address_mode_t am;
3638 ia32_address_t *addr = &am.addr;
3640 assert(get_irn_mode(op) == mode_P);
3642 match_arguments(&am, block, NULL, op, NULL,
3643 match_am | match_8bit_am | match_16bit_am |
3644 match_immediate | match_8bit | match_16bit);
3646 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3647 addr->base, addr->index, addr->mem,
3649 set_am_attributes(new_node, &am);
3650 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3652 new_node = fix_mem_proj(new_node, &am);
3658 * Transform a Bound node.
3660 static ir_node *gen_Bound(ir_node *node)
3663 ir_node *lower = get_Bound_lower(node);
3664 dbg_info *dbgi = get_irn_dbg_info(node);
3666 if (is_Const_0(lower)) {
3667 /* typical case for Java */
3668 ir_node *sub, *res, *flags, *block;
3669 ir_graph *irg = current_ir_graph;
3671 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3672 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3674 block = get_nodes_block(res);
3675 if (! is_Proj(res)) {
3677 set_irn_mode(sub, mode_T);
3678 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3680 sub = get_Proj_pred(res);
3682 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3683 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3684 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3686 panic("generic Bound not supported in ia32 Backend");
3692 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3694 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3695 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3697 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3698 match_immediate | match_mode_neutral);
3701 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3703 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3704 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3705 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3709 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3711 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3712 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3713 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3717 static ir_node *gen_ia32_l_Add(ir_node *node) {
3718 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3719 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3720 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3721 match_commutative | match_am | match_immediate |
3722 match_mode_neutral);
3724 if(is_Proj(lowered)) {
3725 lowered = get_Proj_pred(lowered);
3727 assert(is_ia32_Add(lowered));
3728 set_irn_mode(lowered, mode_T);
3734 static ir_node *gen_ia32_l_Adc(ir_node *node)
3736 return gen_binop_flags(node, new_rd_ia32_Adc,
3737 match_commutative | match_am | match_immediate |
3738 match_mode_neutral);
3742 * Transforms a l_MulS into a "real" MulS node.
3744 * @return the created ia32 Mul node
3746 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3747 ir_node *left = get_binop_left(node);
3748 ir_node *right = get_binop_right(node);
3750 return gen_binop(node, left, right, new_rd_ia32_Mul,
3751 match_commutative | match_am | match_mode_neutral);
3755 * Transforms a l_IMulS into a "real" IMul1OPS node.
3757 * @return the created ia32 IMul1OP node
3759 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3760 ir_node *left = get_binop_left(node);
3761 ir_node *right = get_binop_right(node);
3763 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3764 match_commutative | match_am | match_mode_neutral);
3767 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3768 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3769 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3770 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3771 match_am | match_immediate | match_mode_neutral);
3773 if(is_Proj(lowered)) {
3774 lowered = get_Proj_pred(lowered);
3776 assert(is_ia32_Sub(lowered));
3777 set_irn_mode(lowered, mode_T);
3783 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3784 return gen_binop_flags(node, new_rd_ia32_Sbb,
3785 match_am | match_immediate | match_mode_neutral);
3789 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3790 * op1 - target to be shifted
3791 * op2 - contains bits to be shifted into target
3793 * Only op3 can be an immediate.
3795 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3796 ir_node *low, ir_node *count)
3798 ir_node *block = get_nodes_block(node);
3799 ir_node *new_block = be_transform_node(block);
3800 ir_graph *irg = current_ir_graph;
3801 dbg_info *dbgi = get_irn_dbg_info(node);
3802 ir_node *new_high = be_transform_node(high);
3803 ir_node *new_low = be_transform_node(low);
3807 /* the shift amount can be any mode that is bigger than 5 bits, since all
3808 * other bits are ignored anyway */
3809 while (is_Conv(count) &&
3810 get_irn_n_edges(count) == 1 &&
3811 mode_is_int(get_irn_mode(count))) {
3812 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3813 count = get_Conv_op(count);
3815 new_count = create_immediate_or_transform(count, 0);
3817 if (is_ia32_l_ShlD(node)) {
3818 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3821 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3824 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3829 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3831 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3832 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3833 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3834 return gen_lowered_64bit_shifts(node, high, low, count);
3837 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3839 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3840 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3841 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3842 return gen_lowered_64bit_shifts(node, high, low, count);
3845 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3846 ir_node *src_block = get_nodes_block(node);
3847 ir_node *block = be_transform_node(src_block);
3848 ir_graph *irg = current_ir_graph;
3849 dbg_info *dbgi = get_irn_dbg_info(node);
3850 ir_node *frame = get_irg_frame(irg);
3851 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3852 ir_node *nomem = new_NoMem();
3853 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3854 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3855 ir_node *new_val_low = be_transform_node(val_low);
3856 ir_node *new_val_high = be_transform_node(val_high);
3861 ir_node *store_high;
3863 if(!mode_is_signed(get_irn_mode(val_high))) {
3864 panic("unsigned long long -> float not supported yet (%+F)", node);
3868 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3870 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3872 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3873 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3875 set_ia32_use_frame(store_low);
3876 set_ia32_use_frame(store_high);
3877 set_ia32_op_type(store_low, ia32_AddrModeD);
3878 set_ia32_op_type(store_high, ia32_AddrModeD);
3879 set_ia32_ls_mode(store_low, mode_Iu);
3880 set_ia32_ls_mode(store_high, mode_Is);
3881 add_ia32_am_offs_int(store_high, 4);
3885 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3888 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3890 set_ia32_use_frame(fild);
3891 set_ia32_op_type(fild, ia32_AddrModeS);
3892 set_ia32_ls_mode(fild, mode_Ls);
3894 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3896 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3899 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3900 ir_node *src_block = get_nodes_block(node);
3901 ir_node *block = be_transform_node(src_block);
3902 ir_graph *irg = current_ir_graph;
3903 dbg_info *dbgi = get_irn_dbg_info(node);
3904 ir_node *frame = get_irg_frame(irg);
3905 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3906 ir_node *nomem = new_NoMem();
3907 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3908 ir_node *new_val = be_transform_node(val);
3909 ir_node *fist, *mem;
3911 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3912 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3913 set_ia32_use_frame(fist);
3914 set_ia32_op_type(fist, ia32_AddrModeD);
3915 set_ia32_ls_mode(fist, mode_Ls);
3921 * the BAD transformer.
3923 static ir_node *bad_transform(ir_node *node) {
3924 panic("No transform function for %+F available.", node);
3928 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3929 ir_graph *irg = current_ir_graph;
3930 ir_node *block = be_transform_node(get_nodes_block(node));
3931 ir_node *pred = get_Proj_pred(node);
3932 ir_node *new_pred = be_transform_node(pred);
3933 ir_node *frame = get_irg_frame(irg);
3934 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3935 dbg_info *dbgi = get_irn_dbg_info(node);
3936 long pn = get_Proj_proj(node);
3941 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3942 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3943 set_ia32_use_frame(load);
3944 set_ia32_op_type(load, ia32_AddrModeS);
3945 set_ia32_ls_mode(load, mode_Iu);
3946 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3947 * 32 bit from it with this particular load */
3948 attr = get_ia32_attr(load);
3949 attr->data.need_64bit_stackent = 1;
3951 if (pn == pn_ia32_l_FloattoLL_res_high) {
3952 add_ia32_am_offs_int(load, 4);
3954 assert(pn == pn_ia32_l_FloattoLL_res_low);
3957 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3963 * Transform the Projs of an AddSP.
3965 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
3966 ir_node *block = be_transform_node(get_nodes_block(node));
3967 ir_node *pred = get_Proj_pred(node);
3968 ir_node *new_pred = be_transform_node(pred);
3969 ir_graph *irg = current_ir_graph;
3970 dbg_info *dbgi = get_irn_dbg_info(node);
3971 long proj = get_Proj_proj(node);
3973 if (proj == pn_be_AddSP_sp) {
3974 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3975 pn_ia32_SubSP_stack);
3976 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3978 } else if(proj == pn_be_AddSP_res) {
3979 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3980 pn_ia32_SubSP_addr);
3981 } else if (proj == pn_be_AddSP_M) {
3982 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
3985 panic("No idea how to transform proj->AddSP");
3989 * Transform the Projs of a SubSP.
3991 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
3992 ir_node *block = be_transform_node(get_nodes_block(node));
3993 ir_node *pred = get_Proj_pred(node);
3994 ir_node *new_pred = be_transform_node(pred);
3995 ir_graph *irg = current_ir_graph;
3996 dbg_info *dbgi = get_irn_dbg_info(node);
3997 long proj = get_Proj_proj(node);
3999 if (proj == pn_be_SubSP_sp) {
4000 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4001 pn_ia32_AddSP_stack);
4002 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4004 } else if (proj == pn_be_SubSP_M) {
4005 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4008 panic("No idea how to transform proj->SubSP");
4012 * Transform and renumber the Projs from a Load.
4014 static ir_node *gen_Proj_Load(ir_node *node) {
4016 ir_node *block = be_transform_node(get_nodes_block(node));
4017 ir_node *pred = get_Proj_pred(node);
4018 ir_graph *irg = current_ir_graph;
4019 dbg_info *dbgi = get_irn_dbg_info(node);
4020 long proj = get_Proj_proj(node);
4022 /* loads might be part of source address mode matches, so we don't
4023 * transform the ProjMs yet (with the exception of loads whose result is
4026 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4029 /* this is needed, because sometimes we have loops that are only
4030 reachable through the ProjM */
4031 be_enqueue_preds(node);
4032 /* do it in 2 steps, to silence firm verifier */
4033 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4034 set_Proj_proj(res, pn_ia32_mem);
4038 /* renumber the proj */
4039 new_pred = be_transform_node(pred);
4040 if (is_ia32_Load(new_pred)) {
4043 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4045 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4046 case pn_Load_X_regular:
4047 return new_rd_Jmp(dbgi, irg, block);
4048 case pn_Load_X_except:
4049 /* This Load might raise an exception. Mark it. */
4050 set_ia32_exc_label(new_pred, 1);
4051 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4055 } else if (is_ia32_Conv_I2I(new_pred) ||
4056 is_ia32_Conv_I2I8Bit(new_pred)) {
4057 set_irn_mode(new_pred, mode_T);
4058 if (proj == pn_Load_res) {
4059 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4060 } else if (proj == pn_Load_M) {
4061 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4063 } else if (is_ia32_xLoad(new_pred)) {
4066 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4068 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4069 case pn_Load_X_regular:
4070 return new_rd_Jmp(dbgi, irg, block);
4071 case pn_Load_X_except:
4072 /* This Load might raise an exception. Mark it. */
4073 set_ia32_exc_label(new_pred, 1);
4074 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4078 } else if (is_ia32_vfld(new_pred)) {
4081 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4083 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4084 case pn_Load_X_regular:
4085 return new_rd_Jmp(dbgi, irg, block);
4086 case pn_Load_X_except:
4087 /* This Load might raise an exception. Mark it. */
4088 set_ia32_exc_label(new_pred, 1);
4089 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4094 /* can happen for ProJMs when source address mode happened for the
4097 /* however it should not be the result proj, as that would mean the
4098 load had multiple users and should not have been used for
4100 if (proj != pn_Load_M) {
4101 panic("internal error: transformed node not a Load");
4103 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4106 panic("No idea how to transform proj");
4110 * Transform and renumber the Projs from a DivMod like instruction.
4112 static ir_node *gen_Proj_DivMod(ir_node *node) {
4113 ir_node *block = be_transform_node(get_nodes_block(node));
4114 ir_node *pred = get_Proj_pred(node);
4115 ir_node *new_pred = be_transform_node(pred);
4116 ir_graph *irg = current_ir_graph;
4117 dbg_info *dbgi = get_irn_dbg_info(node);
4118 long proj = get_Proj_proj(node);
4120 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4122 switch (get_irn_opcode(pred)) {
4126 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4128 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4129 case pn_Div_X_regular:
4130 return new_rd_Jmp(dbgi, irg, block);
4131 case pn_Div_X_except:
4132 set_ia32_exc_label(new_pred, 1);
4133 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4141 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4143 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4144 case pn_Mod_X_except:
4145 set_ia32_exc_label(new_pred, 1);
4146 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4154 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4155 case pn_DivMod_res_div:
4156 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4157 case pn_DivMod_res_mod:
4158 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4159 case pn_DivMod_X_regular:
4160 return new_rd_Jmp(dbgi, irg, block);
4161 case pn_DivMod_X_except:
4162 set_ia32_exc_label(new_pred, 1);
4163 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4172 panic("No idea how to transform proj->DivMod");
4176 * Transform and renumber the Projs from a CopyB.
4178 static ir_node *gen_Proj_CopyB(ir_node *node) {
4179 ir_node *block = be_transform_node(get_nodes_block(node));
4180 ir_node *pred = get_Proj_pred(node);
4181 ir_node *new_pred = be_transform_node(pred);
4182 ir_graph *irg = current_ir_graph;
4183 dbg_info *dbgi = get_irn_dbg_info(node);
4184 long proj = get_Proj_proj(node);
4187 case pn_CopyB_M_regular:
4188 if (is_ia32_CopyB_i(new_pred)) {
4189 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4190 } else if (is_ia32_CopyB(new_pred)) {
4191 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4198 panic("No idea how to transform proj->CopyB");
4202 * Transform and renumber the Projs from a Quot.
4204 static ir_node *gen_Proj_Quot(ir_node *node) {
4205 ir_node *block = be_transform_node(get_nodes_block(node));
4206 ir_node *pred = get_Proj_pred(node);
4207 ir_node *new_pred = be_transform_node(pred);
4208 ir_graph *irg = current_ir_graph;
4209 dbg_info *dbgi = get_irn_dbg_info(node);
4210 long proj = get_Proj_proj(node);
4214 if (is_ia32_xDiv(new_pred)) {
4215 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4216 } else if (is_ia32_vfdiv(new_pred)) {
4217 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4221 if (is_ia32_xDiv(new_pred)) {
4222 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4223 } else if (is_ia32_vfdiv(new_pred)) {
4224 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4227 case pn_Quot_X_regular:
4228 case pn_Quot_X_except:
4233 panic("No idea how to transform proj->Quot");
4236 static ir_node *gen_be_Call(ir_node *node) {
4237 ir_node *res = be_duplicate_node(node);
4240 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4242 /* Run the x87 simulator if the call returns a float value */
4243 call_tp = be_Call_get_type(node);
4244 if (get_method_n_ress(call_tp) > 0) {
4245 ir_type *const res_type = get_method_res_type(call_tp, 0);
4246 ir_mode *const res_mode = get_type_mode(res_type);
4248 if (res_mode != NULL && mode_is_float(res_mode)) {
4249 env_cg->do_x87_sim = 1;
4256 static ir_node *gen_be_IncSP(ir_node *node) {
4257 ir_node *res = be_duplicate_node(node);
4258 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4264 * Transform the Projs from a be_Call.
4266 static ir_node *gen_Proj_be_Call(ir_node *node) {
4267 ir_node *block = be_transform_node(get_nodes_block(node));
4268 ir_node *call = get_Proj_pred(node);
4269 ir_node *new_call = be_transform_node(call);
4270 ir_graph *irg = current_ir_graph;
4271 dbg_info *dbgi = get_irn_dbg_info(node);
4272 ir_type *method_type = be_Call_get_type(call);
4273 int n_res = get_method_n_ress(method_type);
4274 long proj = get_Proj_proj(node);
4275 ir_mode *mode = get_irn_mode(node);
4277 const arch_register_class_t *cls;
4279 /* The following is kinda tricky: If we're using SSE, then we have to
4280 * move the result value of the call in floating point registers to an
4281 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4282 * after the call, we have to make sure to correctly make the
4283 * MemProj and the result Proj use these 2 nodes
4285 if (proj == pn_be_Call_M_regular) {
4286 // get new node for result, are we doing the sse load/store hack?
4287 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4288 ir_node *call_res_new;
4289 ir_node *call_res_pred = NULL;
4291 if (call_res != NULL) {
4292 call_res_new = be_transform_node(call_res);
4293 call_res_pred = get_Proj_pred(call_res_new);
4296 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4297 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4298 pn_be_Call_M_regular);
4300 assert(is_ia32_xLoad(call_res_pred));
4301 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4305 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4306 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4308 ir_node *frame = get_irg_frame(irg);
4309 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4311 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4314 /* in case there is no memory output: create one to serialize the copy
4316 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4317 pn_be_Call_M_regular);
4318 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4319 pn_be_Call_first_res);
4321 /* store st(0) onto stack */
4322 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4324 set_ia32_op_type(fstp, ia32_AddrModeD);
4325 set_ia32_use_frame(fstp);
4327 /* load into SSE register */
4328 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4330 set_ia32_op_type(sse_load, ia32_AddrModeS);
4331 set_ia32_use_frame(sse_load);
4333 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4339 /* transform call modes */
4340 if (mode_is_data(mode)) {
4341 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4345 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4349 * Transform the Projs from a Cmp.
4351 static ir_node *gen_Proj_Cmp(ir_node *node)
4353 /* this probably means not all mode_b nodes were lowered... */
4354 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4359 * Transform the Projs from a Bound.
4361 static ir_node *gen_Proj_Bound(ir_node *node)
4363 ir_node *new_node, *block;
4364 ir_node *pred = get_Proj_pred(node);
4366 switch (get_Proj_proj(node)) {
4368 return be_transform_node(get_Bound_mem(pred));
4369 case pn_Bound_X_regular:
4370 new_node = be_transform_node(pred);
4371 block = get_nodes_block(new_node);
4372 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4373 case pn_Bound_X_except:
4374 new_node = be_transform_node(pred);
4375 block = get_nodes_block(new_node);
4376 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4378 return be_transform_node(get_Bound_index(pred));
4380 panic("unsupported Proj from Bound");
4384 static ir_node *gen_Proj_ASM(ir_node *node)
4390 if (get_irn_mode(node) != mode_M)
4391 return be_duplicate_node(node);
4393 pred = get_Proj_pred(node);
4394 new_pred = be_transform_node(pred);
4395 block = get_nodes_block(new_pred);
4396 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4397 get_ia32_n_res(new_pred) + 1);
4401 * Transform and potentially renumber Proj nodes.
4403 static ir_node *gen_Proj(ir_node *node) {
4404 ir_node *pred = get_Proj_pred(node);
4407 switch (get_irn_opcode(pred)) {
4409 proj = get_Proj_proj(node);
4410 if (proj == pn_Store_M) {
4411 return be_transform_node(pred);
4413 panic("No idea how to transform proj->Store");
4416 return gen_Proj_Load(node);
4418 return gen_Proj_ASM(node);
4422 return gen_Proj_DivMod(node);
4424 return gen_Proj_CopyB(node);
4426 return gen_Proj_Quot(node);
4428 return gen_Proj_be_SubSP(node);
4430 return gen_Proj_be_AddSP(node);
4432 return gen_Proj_be_Call(node);
4434 return gen_Proj_Cmp(node);
4436 return gen_Proj_Bound(node);
4438 proj = get_Proj_proj(node);
4439 if (proj == pn_Start_X_initial_exec) {
4440 ir_node *block = get_nodes_block(pred);
4441 dbg_info *dbgi = get_irn_dbg_info(node);
4444 /* we exchange the ProjX with a jump */
4445 block = be_transform_node(block);
4446 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
4449 if (node == be_get_old_anchor(anchor_tls)) {
4450 return gen_Proj_tls(node);
4455 if (is_ia32_l_FloattoLL(pred)) {
4456 return gen_Proj_l_FloattoLL(node);
4458 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4462 ir_mode *mode = get_irn_mode(node);
4463 if (ia32_mode_needs_gp_reg(mode)) {
4464 ir_node *new_pred = be_transform_node(pred);
4465 ir_node *block = be_transform_node(get_nodes_block(node));
4466 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4467 mode_Iu, get_Proj_proj(node));
4468 #ifdef DEBUG_libfirm
4469 new_proj->node_nr = node->node_nr;
4475 return be_duplicate_node(node);
4479 * Enters all transform functions into the generic pointer
4481 static void register_transformers(void)
4485 /* first clear the generic function pointer for all ops */
4486 clear_irp_opcodes_generic_func();
4488 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4489 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4527 /* transform ops from intrinsic lowering */
4539 GEN(ia32_l_LLtoFloat);
4540 GEN(ia32_l_FloattoLL);
4546 /* we should never see these nodes */
4561 /* handle generic backend nodes */
4570 op_Mulh = get_op_Mulh();
4579 * Pre-transform all unknown and noreg nodes.
4581 static void ia32_pretransform_node(void *arch_cg) {
4582 ia32_code_gen_t *cg = arch_cg;
4584 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4585 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4586 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4587 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4588 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4589 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4594 * Walker, checks if all ia32 nodes producing more than one result have their
4595 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4597 static void add_missing_keep_walker(ir_node *node, void *data)
4600 unsigned found_projs = 0;
4601 const ir_edge_t *edge;
4602 ir_mode *mode = get_irn_mode(node);
4607 if(!is_ia32_irn(node))
4610 n_outs = get_ia32_n_res(node);
4613 if(is_ia32_SwitchJmp(node))
4616 assert(n_outs < (int) sizeof(unsigned) * 8);
4617 foreach_out_edge(node, edge) {
4618 ir_node *proj = get_edge_src_irn(edge);
4619 int pn = get_Proj_proj(proj);
4621 if (get_irn_mode(proj) == mode_M)
4624 assert(pn < n_outs);
4625 found_projs |= 1 << pn;
4629 /* are keeps missing? */
4631 for(i = 0; i < n_outs; ++i) {
4634 const arch_register_req_t *req;
4635 const arch_register_class_t *cls;
4637 if(found_projs & (1 << i)) {
4641 req = get_ia32_out_req(node, i);
4646 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4650 block = get_nodes_block(node);
4651 in[0] = new_r_Proj(current_ir_graph, block, node,
4652 arch_register_class_mode(cls), i);
4653 if(last_keep != NULL) {
4654 be_Keep_add_node(last_keep, cls, in[0]);
4656 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4657 if(sched_is_scheduled(node)) {
4658 sched_add_after(node, last_keep);
4665 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4668 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4670 ir_graph *irg = be_get_birg_irg(cg->birg);
4671 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4674 /* do the transformation */
4675 void ia32_transform_graph(ia32_code_gen_t *cg) {
4677 ir_graph *irg = cg->irg;
4679 register_transformers();
4681 initial_fpcw = NULL;
4683 BE_TIMER_PUSH(t_heights);
4684 heights = heights_new(irg);
4685 BE_TIMER_POP(t_heights);
4686 ia32_calculate_non_address_mode_nodes(cg->birg);
4688 /* the transform phase is not safe for CSE (yet) because several nodes get
4689 * attributes set after their creation */
4690 cse_last = get_opt_cse();
4693 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4695 set_opt_cse(cse_last);
4697 ia32_free_non_address_mode_nodes();
4698 heights_free(heights);
4702 void ia32_init_transform(void)
4704 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");