2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
36 #include "irgraph_t.h"
41 #include "iredges_t.h"
54 #include "../benode_t.h"
55 #include "../besched.h"
57 #include "../beutil.h"
58 #include "../beirg_t.h"
59 #include "../betranshlp.h"
62 #include "bearch_ia32_t.h"
63 #include "ia32_common_transform.h"
64 #include "ia32_nodes_attr.h"
65 #include "ia32_transform.h"
66 #include "ia32_new_nodes.h"
67 #include "ia32_map_regs.h"
68 #include "ia32_dbg_stat.h"
69 #include "ia32_optimize.h"
70 #include "ia32_util.h"
71 #include "ia32_address_mode.h"
72 #include "ia32_architecture.h"
74 #include "gen_ia32_regalloc_if.h"
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
82 #define TP_SFP_SIGN "ia32_sfp_sign"
83 #define TP_DFP_SIGN "ia32_dfp_sign"
84 #define TP_SFP_ABS "ia32_sfp_abs"
85 #define TP_DFP_ABS "ia32_dfp_abs"
86 #define TP_INT_MAX "ia32_int_max"
88 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
89 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
90 #define ENT_SFP_ABS "IA32_SFP_ABS"
91 #define ENT_DFP_ABS "IA32_DFP_ABS"
92 #define ENT_INT_MAX "IA32_INT_MAX"
94 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
95 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
97 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
99 static ir_node *initial_fpcw = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *create_immediate_or_transform(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
132 dbg_info *dbgi, ir_node *block,
133 ir_node *op, ir_node *orig_node);
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node) {
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node) {
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node) {
147 return is_Const(node) && is_Const_all_one(node);
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_x87_Const(ir_node *node)
155 tarval *tv = get_Const_tarval(node);
156 if (tarval_is_null(tv) || tarval_is_one(tv))
159 /* TODO: match all the other float constants */
164 * returns true if constant can be created with a simple float command
166 static bool is_simple_sse_Const(ir_node *node)
168 tarval *tv = get_Const_tarval(node);
169 ir_mode *mode = get_tarval_mode(tv);
174 if (tarval_is_null(tv) || tarval_is_one(tv))
177 if (mode == mode_D) {
178 unsigned val = get_tarval_sub_bits(tv, 0) |
179 (get_tarval_sub_bits(tv, 1) << 8) |
180 (get_tarval_sub_bits(tv, 2) << 16) |
181 (get_tarval_sub_bits(tv, 3) << 24);
183 /* lower 32bit are zero, really a 32bit constant */
187 /* TODO: match all the other float constants */
192 * Transforms a Const.
194 static ir_node *gen_Const(ir_node *node) {
195 ir_graph *irg = current_ir_graph;
196 ir_node *old_block = get_nodes_block(node);
197 ir_node *block = be_transform_node(old_block);
198 dbg_info *dbgi = get_irn_dbg_info(node);
199 ir_mode *mode = get_irn_mode(node);
201 assert(is_Const(node));
203 if (mode_is_float(mode)) {
205 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
206 ir_node *nomem = new_NoMem();
210 if (ia32_cg_config.use_sse2) {
211 tarval *tv = get_Const_tarval(node);
212 if (tarval_is_null(tv)) {
213 load = new_rd_ia32_xZero(dbgi, irg, block);
214 set_ia32_ls_mode(load, mode);
216 } else if (tarval_is_one(tv)) {
217 int cnst = mode == mode_F ? 26 : 55;
218 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
219 ir_node *imm2 = create_Immediate(NULL, 0, 2);
220 ir_node *pslld, *psrld;
222 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
223 set_ia32_ls_mode(load, mode);
224 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
225 set_ia32_ls_mode(pslld, mode);
226 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
227 set_ia32_ls_mode(psrld, mode);
229 } else if (mode == mode_F) {
230 /* we can place any 32bit constant by using a movd gp, sse */
231 unsigned val = get_tarval_sub_bits(tv, 0) |
232 (get_tarval_sub_bits(tv, 1) << 8) |
233 (get_tarval_sub_bits(tv, 2) << 16) |
234 (get_tarval_sub_bits(tv, 3) << 24);
235 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
236 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
237 set_ia32_ls_mode(load, mode);
240 if (mode == mode_D) {
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *imm32 = create_Immediate(NULL, 0, 32);
247 ir_node *cnst, *psllq;
249 /* fine, lower 32bit are zero, produce 32bit value */
250 val = get_tarval_sub_bits(tv, 4) |
251 (get_tarval_sub_bits(tv, 5) << 8) |
252 (get_tarval_sub_bits(tv, 6) << 16) |
253 (get_tarval_sub_bits(tv, 7) << 24);
254 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
255 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
256 set_ia32_ls_mode(load, mode);
257 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
258 set_ia32_ls_mode(psllq, mode);
263 floatent = create_float_const_entity(node);
265 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
267 set_ia32_op_type(load, ia32_AddrModeS);
268 set_ia32_am_sc(load, floatent);
269 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
270 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
273 if (is_Const_null(node)) {
274 load = new_rd_ia32_vfldz(dbgi, irg, block);
276 set_ia32_ls_mode(load, mode);
277 } else if (is_Const_one(node)) {
278 load = new_rd_ia32_vfld1(dbgi, irg, block);
280 set_ia32_ls_mode(load, mode);
282 floatent = create_float_const_entity(node);
284 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
285 set_ia32_op_type(load, ia32_AddrModeS);
286 set_ia32_am_sc(load, floatent);
287 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
288 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
289 /* take the mode from the entity */
290 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
294 /* Const Nodes before the initial IncSP are a bad idea, because
295 * they could be spilled and we have no SP ready at that point yet.
296 * So add a dependency to the initial frame pointer calculation to
297 * avoid that situation.
299 if (get_irg_start_block(irg) == block) {
300 add_irn_dep(load, get_irg_frame(irg));
303 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
305 } else { /* non-float mode */
307 tarval *tv = get_Const_tarval(node);
310 tv = tarval_convert_to(tv, mode_Iu);
312 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
314 panic("couldn't convert constant tarval (%+F)", node);
316 val = get_tarval_long(tv);
318 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
319 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
322 if (get_irg_start_block(irg) == block) {
323 add_irn_dep(cnst, get_irg_frame(irg));
331 * Transforms a SymConst.
333 static ir_node *gen_SymConst(ir_node *node) {
334 ir_graph *irg = current_ir_graph;
335 ir_node *old_block = get_nodes_block(node);
336 ir_node *block = be_transform_node(old_block);
337 dbg_info *dbgi = get_irn_dbg_info(node);
338 ir_mode *mode = get_irn_mode(node);
341 if (mode_is_float(mode)) {
342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
343 ir_node *nomem = new_NoMem();
345 if (ia32_cg_config.use_sse2)
346 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
348 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if(get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
361 /* Const Nodes before the initial IncSP are a bad idea, because
362 * they could be spilled and we have no SP ready at that point yet
364 if (get_irg_start_block(irg) == block) {
365 add_irn_dep(cnst, get_irg_frame(irg));
368 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
373 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
374 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
375 static const struct {
377 const char *ent_name;
378 const char *cnst_str;
381 } names [ia32_known_const_max] = {
382 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
383 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
384 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
385 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
386 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
388 static ir_entity *ent_cache[ia32_known_const_max];
390 const char *tp_name, *ent_name, *cnst_str;
398 ent_name = names[kct].ent_name;
399 if (! ent_cache[kct]) {
400 tp_name = names[kct].tp_name;
401 cnst_str = names[kct].cnst_str;
403 switch (names[kct].mode) {
404 case 0: mode = mode_Iu; break;
405 case 1: mode = mode_Lu; break;
406 default: mode = mode_F; break;
408 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
409 tp = new_type_primitive(new_id_from_str(tp_name), mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, names[kct].align);
413 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
415 set_entity_ld_ident(ent, get_entity_ident(ent));
416 set_entity_visibility(ent, visibility_local);
417 set_entity_variability(ent, variability_constant);
418 set_entity_allocation(ent, allocation_static);
420 /* we create a new entity here: It's initialization must resist on the
422 rem = current_ir_graph;
423 current_ir_graph = get_const_code_irg();
424 cnst = new_Const(mode, tv);
425 current_ir_graph = rem;
427 set_atomic_ent_value(ent, cnst);
429 /* cache the entry */
430 ent_cache[kct] = ent;
433 return ent_cache[kct];
437 * return true if the node is a Proj(Load) and could be used in source address
438 * mode for another node. Will return only true if the @p other node is not
439 * dependent on the memory of the Load (for binary operations use the other
440 * input here, for unary operations use NULL).
442 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
443 ir_node *other, ir_node *other2, match_flags_t flags)
448 /* float constants are always available */
449 if (is_Const(node)) {
450 ir_mode *mode = get_irn_mode(node);
451 if (mode_is_float(mode)) {
452 if (ia32_cg_config.use_sse2) {
453 if (is_simple_sse_Const(node))
456 if (is_simple_x87_Const(node))
459 if (get_irn_n_edges(node) > 1)
467 load = get_Proj_pred(node);
468 pn = get_Proj_proj(node);
469 if (!is_Load(load) || pn != pn_Load_res)
471 if (get_nodes_block(load) != block)
473 /* we only use address mode if we're the only user of the load */
474 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
476 /* in some edge cases with address mode we might reach the load normally
477 * and through some AM sequence, if it is already materialized then we
478 * can't create an AM node from it */
479 if (be_is_transformed(node))
482 /* don't do AM if other node inputs depend on the load (via mem-proj) */
483 if (other != NULL && prevents_AM(block, load, other))
486 if (other2 != NULL && prevents_AM(block, load, other2))
492 typedef struct ia32_address_mode_t ia32_address_mode_t;
493 struct ia32_address_mode_t {
498 ia32_op_type_t op_type;
502 unsigned commutative : 1;
503 unsigned ins_permuted : 1;
506 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
510 /* construct load address */
511 memset(addr, 0, sizeof(addr[0]));
512 ia32_create_address_mode(addr, ptr, /*force=*/0);
514 noreg_gp = ia32_new_NoReg_gp(env_cg);
515 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
516 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
517 addr->mem = be_transform_node(mem);
520 static void build_address(ia32_address_mode_t *am, ir_node *node)
522 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
523 ia32_address_t *addr = &am->addr;
529 if (is_Const(node)) {
530 ir_entity *entity = create_float_const_entity(node);
531 addr->base = noreg_gp;
532 addr->index = noreg_gp;
533 addr->mem = new_NoMem();
534 addr->symconst_ent = entity;
536 am->ls_mode = get_type_mode(get_entity_type(entity));
537 am->pinned = op_pin_state_floats;
541 load = get_Proj_pred(node);
542 ptr = get_Load_ptr(load);
543 mem = get_Load_mem(load);
544 new_mem = be_transform_node(mem);
545 am->pinned = get_irn_pinned(load);
546 am->ls_mode = get_Load_mode(load);
547 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
550 /* construct load address */
551 ia32_create_address_mode(addr, ptr, /*force=*/0);
553 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
554 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
558 static void set_address(ir_node *node, const ia32_address_t *addr)
560 set_ia32_am_scale(node, addr->scale);
561 set_ia32_am_sc(node, addr->symconst_ent);
562 set_ia32_am_offs_int(node, addr->offset);
563 if(addr->symconst_sign)
564 set_ia32_am_sc_sign(node);
566 set_ia32_use_frame(node);
567 set_ia32_frame_ent(node, addr->frame_entity);
571 * Apply attributes of a given address mode to a node.
573 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
575 set_address(node, &am->addr);
577 set_ia32_op_type(node, am->op_type);
578 set_ia32_ls_mode(node, am->ls_mode);
579 if (am->pinned == op_pin_state_pinned) {
580 /* beware: some nodes are already pinned and did not allow to change the state */
581 if (get_irn_pinned(node) != op_pin_state_pinned)
582 set_irn_pinned(node, op_pin_state_pinned);
585 set_ia32_commutative(node);
589 * Check, if a given node is a Down-Conv, ie. a integer Conv
590 * from a mode with a mode with more bits to a mode with lesser bits.
591 * Moreover, we return only true if the node has not more than 1 user.
593 * @param node the node
594 * @return non-zero if node is a Down-Conv
596 static int is_downconv(const ir_node *node)
604 /* we only want to skip the conv when we're the only user
605 * (not optimal but for now...)
607 if(get_irn_n_edges(node) > 1)
610 src_mode = get_irn_mode(get_Conv_op(node));
611 dest_mode = get_irn_mode(node);
612 return ia32_mode_needs_gp_reg(src_mode)
613 && ia32_mode_needs_gp_reg(dest_mode)
614 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
617 /* Skip all Down-Conv's on a given node and return the resulting node. */
618 ir_node *ia32_skip_downconv(ir_node *node) {
619 while (is_downconv(node))
620 node = get_Conv_op(node);
625 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
627 ir_mode *mode = get_irn_mode(node);
632 if(mode_is_signed(mode)) {
637 block = get_nodes_block(node);
638 dbgi = get_irn_dbg_info(node);
640 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
644 * matches operands of a node into ia32 addressing/operand modes. This covers
645 * usage of source address mode, immediates, operations with non 32-bit modes,
647 * The resulting data is filled into the @p am struct. block is the block
648 * of the node whose arguments are matched. op1, op2 are the first and second
649 * input that are matched (op1 may be NULL). other_op is another unrelated
650 * input that is not matched! but which is needed sometimes to check if AM
651 * for op1/op2 is legal.
652 * @p flags describes the supported modes of the operation in detail.
654 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
655 ir_node *op1, ir_node *op2, ir_node *other_op,
658 ia32_address_t *addr = &am->addr;
659 ir_mode *mode = get_irn_mode(op2);
660 int mode_bits = get_mode_size_bits(mode);
661 ir_node *noreg_gp, *new_op1, *new_op2;
663 unsigned commutative;
664 int use_am_and_immediates;
667 memset(am, 0, sizeof(am[0]));
669 commutative = (flags & match_commutative) != 0;
670 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
671 use_am = (flags & match_am) != 0;
672 use_immediate = (flags & match_immediate) != 0;
673 assert(!use_am_and_immediates || use_immediate);
676 assert(!commutative || op1 != NULL);
677 assert(use_am || !(flags & match_8bit_am));
678 assert(use_am || !(flags & match_16bit_am));
680 if (mode_bits == 8) {
681 if (!(flags & match_8bit_am))
683 /* we don't automatically add upconvs yet */
684 assert((flags & match_mode_neutral) || (flags & match_8bit));
685 } else if (mode_bits == 16) {
686 if (!(flags & match_16bit_am))
688 /* we don't automatically add upconvs yet */
689 assert((flags & match_mode_neutral) || (flags & match_16bit));
692 /* we can simply skip downconvs for mode neutral nodes: the upper bits
693 * can be random for these operations */
694 if (flags & match_mode_neutral) {
695 op2 = ia32_skip_downconv(op2);
697 op1 = ia32_skip_downconv(op1);
701 /* match immediates. firm nodes are normalized: constants are always on the
704 if (!(flags & match_try_am) && use_immediate) {
705 new_op2 = try_create_Immediate(op2, 0);
708 noreg_gp = ia32_new_NoReg_gp(env_cg);
709 if (new_op2 == NULL &&
710 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
711 build_address(am, op2);
712 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
713 if (mode_is_float(mode)) {
714 new_op2 = ia32_new_NoReg_vfp(env_cg);
718 am->op_type = ia32_AddrModeS;
719 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
721 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
723 build_address(am, op1);
725 if (mode_is_float(mode)) {
726 noreg = ia32_new_NoReg_vfp(env_cg);
731 if (new_op2 != NULL) {
734 new_op1 = be_transform_node(op2);
736 am->ins_permuted = 1;
738 am->op_type = ia32_AddrModeS;
740 am->op_type = ia32_Normal;
742 if (flags & match_try_am) {
748 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
750 new_op2 = be_transform_node(op2);
752 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
754 if (addr->base == NULL)
755 addr->base = noreg_gp;
756 if (addr->index == NULL)
757 addr->index = noreg_gp;
758 if (addr->mem == NULL)
759 addr->mem = new_NoMem();
761 am->new_op1 = new_op1;
762 am->new_op2 = new_op2;
763 am->commutative = commutative;
766 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
771 if (am->mem_proj == NULL)
774 /* we have to create a mode_T so the old MemProj can attach to us */
775 mode = get_irn_mode(node);
776 load = get_Proj_pred(am->mem_proj);
778 be_set_transformed_node(load, node);
780 if (mode != mode_T) {
781 set_irn_mode(node, mode_T);
782 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
789 * Construct a standard binary operation, set AM and immediate if required.
791 * @param node The original node for which the binop is created
792 * @param op1 The first operand
793 * @param op2 The second operand
794 * @param func The node constructor function
795 * @return The constructed ia32 node.
797 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
798 construct_binop_func *func, match_flags_t flags)
801 ir_node *block, *new_block, *new_node;
802 ia32_address_mode_t am;
803 ia32_address_t *addr = &am.addr;
805 block = get_nodes_block(node);
806 match_arguments(&am, block, op1, op2, NULL, flags);
808 dbgi = get_irn_dbg_info(node);
809 new_block = be_transform_node(block);
810 new_node = func(dbgi, current_ir_graph, new_block,
811 addr->base, addr->index, addr->mem,
812 am.new_op1, am.new_op2);
813 set_am_attributes(new_node, &am);
814 /* we can't use source address mode anymore when using immediates */
815 if (!(flags & match_am_and_immediates) &&
816 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
817 set_ia32_am_support(new_node, ia32_am_none);
818 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
820 new_node = fix_mem_proj(new_node, &am);
827 n_ia32_l_binop_right,
828 n_ia32_l_binop_eflags
830 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
831 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
832 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
833 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
834 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
835 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
838 * Construct a binary operation which also consumes the eflags.
840 * @param node The node to transform
841 * @param func The node constructor function
842 * @param flags The match flags
843 * @return The constructor ia32 node
845 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
848 ir_node *src_block = get_nodes_block(node);
849 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
850 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
851 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
853 ir_node *block, *new_node, *new_eflags;
854 ia32_address_mode_t am;
855 ia32_address_t *addr = &am.addr;
857 match_arguments(&am, src_block, op1, op2, eflags, flags);
859 dbgi = get_irn_dbg_info(node);
860 block = be_transform_node(src_block);
861 new_eflags = be_transform_node(eflags);
862 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
863 addr->mem, am.new_op1, am.new_op2, new_eflags);
864 set_am_attributes(new_node, &am);
865 /* we can't use source address mode anymore when using immediates */
866 if (!(flags & match_am_and_immediates) &&
867 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
868 set_ia32_am_support(new_node, ia32_am_none);
869 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
871 new_node = fix_mem_proj(new_node, &am);
876 static ir_node *get_fpcw(void)
879 if (initial_fpcw != NULL)
882 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
883 &ia32_fp_cw_regs[REG_FPCW]);
884 initial_fpcw = be_transform_node(fpcw);
890 * Construct a standard binary operation, set AM and immediate if required.
892 * @param op1 The first operand
893 * @param op2 The second operand
894 * @param func The node constructor function
895 * @return The constructed ia32 node.
897 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
898 construct_binop_float_func *func,
901 ir_mode *mode = get_irn_mode(node);
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 /* cannot use address mode with long double on x87 */
908 if (get_mode_size_bits(mode) > 64)
911 block = get_nodes_block(node);
912 match_arguments(&am, block, op1, op2, NULL, flags);
914 dbgi = get_irn_dbg_info(node);
915 new_block = be_transform_node(block);
916 new_node = func(dbgi, current_ir_graph, new_block,
917 addr->base, addr->index, addr->mem,
918 am.new_op1, am.new_op2, get_fpcw());
919 set_am_attributes(new_node, &am);
921 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
923 new_node = fix_mem_proj(new_node, &am);
929 * Construct a shift/rotate binary operation, sets AM and immediate if required.
931 * @param op1 The first operand
932 * @param op2 The second operand
933 * @param func The node constructor function
934 * @return The constructed ia32 node.
936 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
937 construct_shift_func *func,
941 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
943 assert(! mode_is_float(get_irn_mode(node)));
944 assert(flags & match_immediate);
945 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
947 if (flags & match_mode_neutral) {
948 op1 = ia32_skip_downconv(op1);
949 new_op1 = be_transform_node(op1);
950 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
951 new_op1 = create_upconv(op1, node);
953 new_op1 = be_transform_node(op1);
956 /* the shift amount can be any mode that is bigger than 5 bits, since all
957 * other bits are ignored anyway */
958 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
959 ir_node *const op = get_Conv_op(op2);
960 if (mode_is_float(get_irn_mode(op)))
963 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
965 new_op2 = create_immediate_or_transform(op2, 0);
967 dbgi = get_irn_dbg_info(node);
968 block = get_nodes_block(node);
969 new_block = be_transform_node(block);
970 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
971 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
973 /* lowered shift instruction may have a dependency operand, handle it here */
974 if (get_irn_arity(node) == 3) {
975 /* we have a dependency */
976 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
977 add_irn_dep(new_node, new_dep);
985 * Construct a standard unary operation, set AM and immediate if required.
987 * @param op The operand
988 * @param func The node constructor function
989 * @return The constructed ia32 node.
991 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
995 ir_node *block, *new_block, *new_op, *new_node;
997 assert(flags == 0 || flags == match_mode_neutral);
998 if (flags & match_mode_neutral) {
999 op = ia32_skip_downconv(op);
1002 new_op = be_transform_node(op);
1003 dbgi = get_irn_dbg_info(node);
1004 block = get_nodes_block(node);
1005 new_block = be_transform_node(block);
1006 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1008 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1013 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1014 ia32_address_t *addr)
1016 ir_node *base, *index, *res;
1020 base = ia32_new_NoReg_gp(env_cg);
1022 base = be_transform_node(base);
1025 index = addr->index;
1026 if (index == NULL) {
1027 index = ia32_new_NoReg_gp(env_cg);
1029 index = be_transform_node(index);
1032 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1033 set_address(res, addr);
1039 * Returns non-zero if a given address mode has a symbolic or
1040 * numerical offset != 0.
1042 static int am_has_immediates(const ia32_address_t *addr)
1044 return addr->offset != 0 || addr->symconst_ent != NULL
1045 || addr->frame_entity || addr->use_frame;
1049 * Creates an ia32 Add.
1051 * @return the created ia32 Add node
1053 static ir_node *gen_Add(ir_node *node) {
1054 ir_mode *mode = get_irn_mode(node);
1055 ir_node *op1 = get_Add_left(node);
1056 ir_node *op2 = get_Add_right(node);
1058 ir_node *block, *new_block, *new_node, *add_immediate_op;
1059 ia32_address_t addr;
1060 ia32_address_mode_t am;
1062 if (mode_is_float(mode)) {
1063 if (ia32_cg_config.use_sse2)
1064 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1065 match_commutative | match_am);
1067 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1068 match_commutative | match_am);
1071 ia32_mark_non_am(node);
1073 op2 = ia32_skip_downconv(op2);
1074 op1 = ia32_skip_downconv(op1);
1078 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1079 * 1. Add with immediate -> Lea
1080 * 2. Add with possible source address mode -> Add
1081 * 3. Otherwise -> Lea
1083 memset(&addr, 0, sizeof(addr));
1084 ia32_create_address_mode(&addr, node, /*force=*/1);
1085 add_immediate_op = NULL;
1087 dbgi = get_irn_dbg_info(node);
1088 block = get_nodes_block(node);
1089 new_block = be_transform_node(block);
1092 if(addr.base == NULL && addr.index == NULL) {
1093 ir_graph *irg = current_ir_graph;
1094 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1095 addr.symconst_sign, addr.offset);
1096 add_irn_dep(new_node, get_irg_frame(irg));
1097 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1100 /* add with immediate? */
1101 if(addr.index == NULL) {
1102 add_immediate_op = addr.base;
1103 } else if(addr.base == NULL && addr.scale == 0) {
1104 add_immediate_op = addr.index;
1107 if(add_immediate_op != NULL) {
1108 if(!am_has_immediates(&addr)) {
1109 #ifdef DEBUG_libfirm
1110 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1113 return be_transform_node(add_immediate_op);
1116 new_node = create_lea_from_address(dbgi, new_block, &addr);
1117 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1121 /* test if we can use source address mode */
1122 match_arguments(&am, block, op1, op2, NULL, match_commutative
1123 | match_mode_neutral | match_am | match_immediate | match_try_am);
1125 /* construct an Add with source address mode */
1126 if (am.op_type == ia32_AddrModeS) {
1127 ir_graph *irg = current_ir_graph;
1128 ia32_address_t *am_addr = &am.addr;
1129 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1130 am_addr->index, am_addr->mem, am.new_op1,
1132 set_am_attributes(new_node, &am);
1133 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1135 new_node = fix_mem_proj(new_node, &am);
1140 /* otherwise construct a lea */
1141 new_node = create_lea_from_address(dbgi, new_block, &addr);
1142 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1147 * Creates an ia32 Mul.
1149 * @return the created ia32 Mul node
1151 static ir_node *gen_Mul(ir_node *node) {
1152 ir_node *op1 = get_Mul_left(node);
1153 ir_node *op2 = get_Mul_right(node);
1154 ir_mode *mode = get_irn_mode(node);
1156 if (mode_is_float(mode)) {
1157 if (ia32_cg_config.use_sse2)
1158 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1159 match_commutative | match_am);
1161 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1162 match_commutative | match_am);
1164 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1165 match_commutative | match_am | match_mode_neutral |
1166 match_immediate | match_am_and_immediates);
1170 * Creates an ia32 Mulh.
1171 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1172 * this result while Mul returns the lower 32 bit.
1174 * @return the created ia32 Mulh node
1176 static ir_node *gen_Mulh(ir_node *node) {
1177 ir_node *block = get_nodes_block(node);
1178 ir_node *new_block = be_transform_node(block);
1179 dbg_info *dbgi = get_irn_dbg_info(node);
1180 ir_node *op1 = get_Mulh_left(node);
1181 ir_node *op2 = get_Mulh_right(node);
1182 ir_mode *mode = get_irn_mode(node);
1184 ir_node *proj_res_high;
1186 if (mode_is_signed(mode)) {
1187 new_node = gen_binop(node, op1, op2, new_rd_ia32_IMul1OP, match_commutative | match_am);
1188 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1189 mode_Iu, pn_ia32_IMul1OP_res_high);
1191 new_node = gen_binop(node, op1, op2, new_rd_ia32_Mul, match_commutative | match_am);
1192 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1193 mode_Iu, pn_ia32_Mul_res_high);
1195 return proj_res_high;
1199 * Creates an ia32 And.
1201 * @return The created ia32 And node
1203 static ir_node *gen_And(ir_node *node) {
1204 ir_node *op1 = get_And_left(node);
1205 ir_node *op2 = get_And_right(node);
1206 assert(! mode_is_float(get_irn_mode(node)));
1208 /* is it a zero extension? */
1209 if (is_Const(op2)) {
1210 tarval *tv = get_Const_tarval(op2);
1211 long v = get_tarval_long(tv);
1213 if (v == 0xFF || v == 0xFFFF) {
1214 dbg_info *dbgi = get_irn_dbg_info(node);
1215 ir_node *block = get_nodes_block(node);
1222 assert(v == 0xFFFF);
1225 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1230 return gen_binop(node, op1, op2, new_rd_ia32_And,
1231 match_commutative | match_mode_neutral | match_am
1238 * Creates an ia32 Or.
1240 * @return The created ia32 Or node
1242 static ir_node *gen_Or(ir_node *node) {
1243 ir_node *op1 = get_Or_left(node);
1244 ir_node *op2 = get_Or_right(node);
1246 assert (! mode_is_float(get_irn_mode(node)));
1247 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1248 | match_mode_neutral | match_am | match_immediate);
1254 * Creates an ia32 Eor.
1256 * @return The created ia32 Eor node
1258 static ir_node *gen_Eor(ir_node *node) {
1259 ir_node *op1 = get_Eor_left(node);
1260 ir_node *op2 = get_Eor_right(node);
1262 assert(! mode_is_float(get_irn_mode(node)));
1263 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1264 | match_mode_neutral | match_am | match_immediate);
1269 * Creates an ia32 Sub.
1271 * @return The created ia32 Sub node
1273 static ir_node *gen_Sub(ir_node *node) {
1274 ir_node *op1 = get_Sub_left(node);
1275 ir_node *op2 = get_Sub_right(node);
1276 ir_mode *mode = get_irn_mode(node);
1278 if (mode_is_float(mode)) {
1279 if (ia32_cg_config.use_sse2)
1280 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1282 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1286 if (is_Const(op2)) {
1287 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1291 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1292 | match_am | match_immediate);
1295 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1296 ir_node *const src_val,
1297 ir_node *const src_mem,
1298 ir_node *const am_mem)
1300 if (is_NoMem(am_mem)) {
1301 return be_transform_node(src_mem);
1302 } else if (is_Proj(src_val) &&
1304 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1305 /* avoid memory loop */
1307 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1308 ir_node *const ptr_pred = get_Proj_pred(src_val);
1309 int const arity = get_Sync_n_preds(src_mem);
1314 NEW_ARR_A(ir_node*, ins, arity + 1);
1316 for (i = arity - 1; i >= 0; --i) {
1317 ir_node *const pred = get_Sync_pred(src_mem, i);
1319 /* avoid memory loop */
1320 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1323 ins[n++] = be_transform_node(pred);
1328 return new_r_Sync(irg, block, n, ins);
1332 ins[0] = be_transform_node(src_mem);
1334 return new_r_Sync(irg, block, 2, ins);
1339 * Generates an ia32 DivMod with additional infrastructure for the
1340 * register allocator if needed.
1342 static ir_node *create_Div(ir_node *node)
1344 ir_graph *irg = current_ir_graph;
1345 dbg_info *dbgi = get_irn_dbg_info(node);
1346 ir_node *block = get_nodes_block(node);
1347 ir_node *new_block = be_transform_node(block);
1354 ir_node *sign_extension;
1355 ia32_address_mode_t am;
1356 ia32_address_t *addr = &am.addr;
1358 /* the upper bits have random contents for smaller modes */
1359 switch (get_irn_opcode(node)) {
1361 op1 = get_Div_left(node);
1362 op2 = get_Div_right(node);
1363 mem = get_Div_mem(node);
1364 mode = get_Div_resmode(node);
1367 op1 = get_Mod_left(node);
1368 op2 = get_Mod_right(node);
1369 mem = get_Mod_mem(node);
1370 mode = get_Mod_resmode(node);
1373 op1 = get_DivMod_left(node);
1374 op2 = get_DivMod_right(node);
1375 mem = get_DivMod_mem(node);
1376 mode = get_DivMod_resmode(node);
1379 panic("invalid divmod node %+F", node);
1382 match_arguments(&am, block, op1, op2, NULL, match_am);
1384 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1385 is the memory of the consumed address. We can have only the second op as address
1386 in Div nodes, so check only op2. */
1387 new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
1389 if (mode_is_signed(mode)) {
1390 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1391 add_irn_dep(produceval, get_irg_frame(irg));
1392 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1395 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1396 addr->index, new_mem, am.new_op2,
1397 am.new_op1, sign_extension);
1399 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1400 add_irn_dep(sign_extension, get_irg_frame(irg));
1402 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1403 addr->index, new_mem, am.new_op2,
1404 am.new_op1, sign_extension);
1407 set_irn_pinned(new_node, get_irn_pinned(node));
1409 set_am_attributes(new_node, &am);
1410 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1412 new_node = fix_mem_proj(new_node, &am);
1418 static ir_node *gen_Mod(ir_node *node) {
1419 return create_Div(node);
1422 static ir_node *gen_Div(ir_node *node) {
1423 return create_Div(node);
1426 static ir_node *gen_DivMod(ir_node *node) {
1427 return create_Div(node);
1433 * Creates an ia32 floating Div.
1435 * @return The created ia32 xDiv node
1437 static ir_node *gen_Quot(ir_node *node)
1439 ir_node *op1 = get_Quot_left(node);
1440 ir_node *op2 = get_Quot_right(node);
1442 if (ia32_cg_config.use_sse2) {
1443 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1445 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1451 * Creates an ia32 Shl.
1453 * @return The created ia32 Shl node
1455 static ir_node *gen_Shl(ir_node *node) {
1456 ir_node *left = get_Shl_left(node);
1457 ir_node *right = get_Shl_right(node);
1459 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1460 match_mode_neutral | match_immediate);
1464 * Creates an ia32 Shr.
1466 * @return The created ia32 Shr node
1468 static ir_node *gen_Shr(ir_node *node) {
1469 ir_node *left = get_Shr_left(node);
1470 ir_node *right = get_Shr_right(node);
1472 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1478 * Creates an ia32 Sar.
1480 * @return The created ia32 Shrs node
1482 static ir_node *gen_Shrs(ir_node *node) {
1483 ir_node *left = get_Shrs_left(node);
1484 ir_node *right = get_Shrs_right(node);
1485 ir_mode *mode = get_irn_mode(node);
1487 if(is_Const(right) && mode == mode_Is) {
1488 tarval *tv = get_Const_tarval(right);
1489 long val = get_tarval_long(tv);
1491 /* this is a sign extension */
1492 ir_graph *irg = current_ir_graph;
1493 dbg_info *dbgi = get_irn_dbg_info(node);
1494 ir_node *block = be_transform_node(get_nodes_block(node));
1496 ir_node *new_op = be_transform_node(op);
1497 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1498 add_irn_dep(pval, get_irg_frame(irg));
1500 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1504 /* 8 or 16 bit sign extension? */
1505 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1506 ir_node *shl_left = get_Shl_left(left);
1507 ir_node *shl_right = get_Shl_right(left);
1508 if(is_Const(shl_right)) {
1509 tarval *tv1 = get_Const_tarval(right);
1510 tarval *tv2 = get_Const_tarval(shl_right);
1511 if(tv1 == tv2 && tarval_is_long(tv1)) {
1512 long val = get_tarval_long(tv1);
1513 if(val == 16 || val == 24) {
1514 dbg_info *dbgi = get_irn_dbg_info(node);
1515 ir_node *block = get_nodes_block(node);
1525 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1534 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1540 * Creates an ia32 Rol.
1542 * @param op1 The first operator
1543 * @param op2 The second operator
1544 * @return The created ia32 RotL node
1546 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1547 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1553 * Creates an ia32 Ror.
1554 * NOTE: There is no RotR with immediate because this would always be a RotL
1555 * "imm-mode_size_bits" which can be pre-calculated.
1557 * @param op1 The first operator
1558 * @param op2 The second operator
1559 * @return The created ia32 RotR node
1561 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1562 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1568 * Creates an ia32 RotR or RotL (depending on the found pattern).
1570 * @return The created ia32 RotL or RotR node
1572 static ir_node *gen_Rotl(ir_node *node) {
1573 ir_node *rotate = NULL;
1574 ir_node *op1 = get_Rotl_left(node);
1575 ir_node *op2 = get_Rotl_right(node);
1577 /* Firm has only RotL, so we are looking for a right (op2)
1578 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1579 that means we can create a RotR instead of an Add and a RotL */
1583 ir_node *left = get_Add_left(add);
1584 ir_node *right = get_Add_right(add);
1585 if (is_Const(right)) {
1586 tarval *tv = get_Const_tarval(right);
1587 ir_mode *mode = get_irn_mode(node);
1588 long bits = get_mode_size_bits(mode);
1590 if (is_Minus(left) &&
1591 tarval_is_long(tv) &&
1592 get_tarval_long(tv) == bits &&
1595 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1596 rotate = gen_Ror(node, op1, get_Minus_op(left));
1601 if (rotate == NULL) {
1602 rotate = gen_Rol(node, op1, op2);
1611 * Transforms a Minus node.
1613 * @return The created ia32 Minus node
1615 static ir_node *gen_Minus(ir_node *node)
1617 ir_node *op = get_Minus_op(node);
1618 ir_node *block = be_transform_node(get_nodes_block(node));
1619 ir_graph *irg = current_ir_graph;
1620 dbg_info *dbgi = get_irn_dbg_info(node);
1621 ir_mode *mode = get_irn_mode(node);
1626 if (mode_is_float(mode)) {
1627 ir_node *new_op = be_transform_node(op);
1628 if (ia32_cg_config.use_sse2) {
1629 /* TODO: non-optimal... if we have many xXors, then we should
1630 * rather create a load for the const and use that instead of
1631 * several AM nodes... */
1632 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1633 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1634 ir_node *nomem = new_rd_NoMem(irg);
1636 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1637 nomem, new_op, noreg_xmm);
1639 size = get_mode_size_bits(mode);
1640 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1642 set_ia32_am_sc(new_node, ent);
1643 set_ia32_op_type(new_node, ia32_AddrModeS);
1644 set_ia32_ls_mode(new_node, mode);
1646 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1649 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1652 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1658 * Transforms a Not node.
1660 * @return The created ia32 Not node
1662 static ir_node *gen_Not(ir_node *node) {
1663 ir_node *op = get_Not_op(node);
1665 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1666 assert (! mode_is_float(get_irn_mode(node)));
1668 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1674 * Transforms an Abs node.
1676 * @return The created ia32 Abs node
1678 static ir_node *gen_Abs(ir_node *node)
1680 ir_node *block = get_nodes_block(node);
1681 ir_node *new_block = be_transform_node(block);
1682 ir_node *op = get_Abs_op(node);
1683 ir_graph *irg = current_ir_graph;
1684 dbg_info *dbgi = get_irn_dbg_info(node);
1685 ir_mode *mode = get_irn_mode(node);
1686 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1687 ir_node *nomem = new_NoMem();
1693 if (mode_is_float(mode)) {
1694 new_op = be_transform_node(op);
1696 if (ia32_cg_config.use_sse2) {
1697 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1698 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1699 nomem, new_op, noreg_fp);
1701 size = get_mode_size_bits(mode);
1702 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1704 set_ia32_am_sc(new_node, ent);
1706 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1708 set_ia32_op_type(new_node, ia32_AddrModeS);
1709 set_ia32_ls_mode(new_node, mode);
1711 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1712 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1715 ir_node *xor, *pval, *sign_extension;
1717 if (get_mode_size_bits(mode) == 32) {
1718 new_op = be_transform_node(op);
1720 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1723 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1724 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1727 add_irn_dep(pval, get_irg_frame(irg));
1728 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1730 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1731 nomem, new_op, sign_extension);
1732 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1734 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1735 nomem, xor, sign_extension);
1736 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1743 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1745 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1746 dbg_info *dbgi = get_irn_dbg_info(cmp);
1747 ir_node *block = get_nodes_block(cmp);
1748 ir_node *new_block = be_transform_node(block);
1749 ir_node *op1 = be_transform_node(x);
1750 ir_node *op2 = be_transform_node(n);
1752 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1756 * Transform a node returning a "flag" result.
1758 * @param node the node to transform
1759 * @param pnc_out the compare mode to use
1761 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1770 /* we have a Cmp as input */
1771 if (is_Proj(node)) {
1772 ir_node *pred = get_Proj_pred(node);
1774 pn_Cmp pnc = get_Proj_proj(node);
1775 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1776 ir_node *l = get_Cmp_left(pred);
1777 ir_node *r = get_Cmp_right(pred);
1779 ir_node *la = get_And_left(l);
1780 ir_node *ra = get_And_right(l);
1782 ir_node *c = get_Shl_left(la);
1783 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1784 /* (1 << n) & ra) */
1785 ir_node *n = get_Shl_right(la);
1786 flags = gen_bt(pred, ra, n);
1787 /* we must generate a Jc/Jnc jump */
1788 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1791 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1796 ir_node *c = get_Shl_left(ra);
1797 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1798 /* la & (1 << n)) */
1799 ir_node *n = get_Shl_right(ra);
1800 flags = gen_bt(pred, la, n);
1801 /* we must generate a Jc/Jnc jump */
1802 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1805 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1811 flags = be_transform_node(pred);
1817 /* a mode_b value, we have to compare it against 0 */
1818 dbgi = get_irn_dbg_info(node);
1819 new_block = be_transform_node(get_nodes_block(node));
1820 new_op = be_transform_node(node);
1821 noreg = ia32_new_NoReg_gp(env_cg);
1822 nomem = new_NoMem();
1823 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1824 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1825 *pnc_out = pn_Cmp_Lg;
1830 * Transforms a Load.
1832 * @return the created ia32 Load node
1834 static ir_node *gen_Load(ir_node *node) {
1835 ir_node *old_block = get_nodes_block(node);
1836 ir_node *block = be_transform_node(old_block);
1837 ir_node *ptr = get_Load_ptr(node);
1838 ir_node *mem = get_Load_mem(node);
1839 ir_node *new_mem = be_transform_node(mem);
1842 ir_graph *irg = current_ir_graph;
1843 dbg_info *dbgi = get_irn_dbg_info(node);
1844 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1845 ir_mode *mode = get_Load_mode(node);
1848 ia32_address_t addr;
1850 /* construct load address */
1851 memset(&addr, 0, sizeof(addr));
1852 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1859 base = be_transform_node(base);
1865 index = be_transform_node(index);
1868 if (mode_is_float(mode)) {
1869 if (ia32_cg_config.use_sse2) {
1870 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1872 res_mode = mode_xmm;
1874 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1876 res_mode = mode_vfp;
1879 assert(mode != mode_b);
1881 /* create a conv node with address mode for smaller modes */
1882 if(get_mode_size_bits(mode) < 32) {
1883 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1884 new_mem, noreg, mode);
1886 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1891 set_irn_pinned(new_node, get_irn_pinned(node));
1892 set_ia32_op_type(new_node, ia32_AddrModeS);
1893 set_ia32_ls_mode(new_node, mode);
1894 set_address(new_node, &addr);
1896 if(get_irn_pinned(node) == op_pin_state_floats) {
1897 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1900 /* make sure we are scheduled behind the initial IncSP/Barrier
1901 * to avoid spills being placed before it
1903 if (block == get_irg_start_block(irg)) {
1904 add_irn_dep(new_node, get_irg_frame(irg));
1907 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1912 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1913 ir_node *ptr, ir_node *other)
1920 /* we only use address mode if we're the only user of the load */
1921 if (get_irn_n_edges(node) > 1)
1924 load = get_Proj_pred(node);
1927 if (get_nodes_block(load) != block)
1930 /* store should have the same pointer as the load */
1931 if (get_Load_ptr(load) != ptr)
1934 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1935 if (other != NULL &&
1936 get_nodes_block(other) == block &&
1937 heights_reachable_in_block(heights, other, load)) {
1944 for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
1945 ir_node *const pred = get_Sync_pred(mem, i);
1947 if (is_Proj(pred) && get_Proj_pred(pred) == load)
1950 if (get_nodes_block(pred) == block &&
1951 heights_reachable_in_block(heights, pred, load)) {
1956 /* Store should be attached to the load */
1957 if (!is_Proj(mem) || get_Proj_pred(mem) != load)
1964 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1965 ir_node *mem, ir_node *ptr, ir_mode *mode,
1966 construct_binop_dest_func *func,
1967 construct_binop_dest_func *func8bit,
1968 match_flags_t flags)
1970 ir_node *src_block = get_nodes_block(node);
1972 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1973 ir_graph *irg = current_ir_graph;
1980 ia32_address_mode_t am;
1981 ia32_address_t *addr = &am.addr;
1982 memset(&am, 0, sizeof(am));
1984 assert(flags & match_dest_am);
1985 assert(flags & match_immediate); /* there is no destam node without... */
1986 commutative = (flags & match_commutative) != 0;
1988 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1989 build_address(&am, op1);
1990 new_op = create_immediate_or_transform(op2, 0);
1991 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1992 build_address(&am, op2);
1993 new_op = create_immediate_or_transform(op1, 0);
1998 if(addr->base == NULL)
1999 addr->base = noreg_gp;
2000 if(addr->index == NULL)
2001 addr->index = noreg_gp;
2002 if(addr->mem == NULL)
2003 addr->mem = new_NoMem();
2005 dbgi = get_irn_dbg_info(node);
2006 block = be_transform_node(src_block);
2007 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2009 if(get_mode_size_bits(mode) == 8) {
2010 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2013 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
2016 set_address(new_node, addr);
2017 set_ia32_op_type(new_node, ia32_AddrModeD);
2018 set_ia32_ls_mode(new_node, mode);
2019 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2021 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2022 mem_proj = be_transform_node(am.mem_proj);
2023 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2028 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2029 ir_node *ptr, ir_mode *mode,
2030 construct_unop_dest_func *func)
2032 ir_graph *irg = current_ir_graph;
2033 ir_node *src_block = get_nodes_block(node);
2039 ia32_address_mode_t am;
2040 ia32_address_t *addr = &am.addr;
2041 memset(&am, 0, sizeof(am));
2043 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2046 build_address(&am, op);
2048 dbgi = get_irn_dbg_info(node);
2049 block = be_transform_node(src_block);
2050 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2051 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
2052 set_address(new_node, addr);
2053 set_ia32_op_type(new_node, ia32_AddrModeD);
2054 set_ia32_ls_mode(new_node, mode);
2055 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2057 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2058 mem_proj = be_transform_node(am.mem_proj);
2059 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2064 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2065 ir_mode *mode = get_irn_mode(node);
2066 ir_node *mux_true = get_Mux_true(node);
2067 ir_node *mux_false = get_Mux_false(node);
2078 ia32_address_t addr;
2080 if(get_mode_size_bits(mode) != 8)
2083 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2085 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2091 build_address_ptr(&addr, ptr, mem);
2093 irg = current_ir_graph;
2094 dbgi = get_irn_dbg_info(node);
2095 block = get_nodes_block(node);
2096 new_block = be_transform_node(block);
2097 cond = get_Mux_sel(node);
2098 flags = get_flags_node(cond, &pnc);
2099 new_mem = be_transform_node(mem);
2100 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2101 addr.index, addr.mem, flags, pnc, negated);
2102 set_address(new_node, &addr);
2103 set_ia32_op_type(new_node, ia32_AddrModeD);
2104 set_ia32_ls_mode(new_node, mode);
2105 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2110 static ir_node *try_create_dest_am(ir_node *node) {
2111 ir_node *val = get_Store_value(node);
2112 ir_node *mem = get_Store_mem(node);
2113 ir_node *ptr = get_Store_ptr(node);
2114 ir_mode *mode = get_irn_mode(val);
2115 unsigned bits = get_mode_size_bits(mode);
2120 /* handle only GP modes for now... */
2121 if(!ia32_mode_needs_gp_reg(mode))
2125 /* store must be the only user of the val node */
2126 if(get_irn_n_edges(val) > 1)
2128 /* skip pointless convs */
2130 ir_node *conv_op = get_Conv_op(val);
2131 ir_mode *pred_mode = get_irn_mode(conv_op);
2132 if (!ia32_mode_needs_gp_reg(pred_mode))
2134 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2142 /* value must be in the same block */
2143 if(get_nodes_block(node) != get_nodes_block(val))
2146 switch (get_irn_opcode(val)) {
2148 op1 = get_Add_left(val);
2149 op2 = get_Add_right(val);
2150 if(is_Const_1(op2)) {
2151 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2152 new_rd_ia32_IncMem);
2154 } else if(is_Const_Minus_1(op2)) {
2155 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2156 new_rd_ia32_DecMem);
2159 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2160 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2161 match_dest_am | match_commutative |
2165 op1 = get_Sub_left(val);
2166 op2 = get_Sub_right(val);
2167 if (is_Const(op2)) {
2168 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2170 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2171 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2172 match_dest_am | match_immediate |
2176 op1 = get_And_left(val);
2177 op2 = get_And_right(val);
2178 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2179 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2180 match_dest_am | match_commutative |
2184 op1 = get_Or_left(val);
2185 op2 = get_Or_right(val);
2186 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2187 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2188 match_dest_am | match_commutative |
2192 op1 = get_Eor_left(val);
2193 op2 = get_Eor_right(val);
2194 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2195 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2196 match_dest_am | match_commutative |
2200 op1 = get_Shl_left(val);
2201 op2 = get_Shl_right(val);
2202 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2203 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2204 match_dest_am | match_immediate);
2207 op1 = get_Shr_left(val);
2208 op2 = get_Shr_right(val);
2209 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2210 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2211 match_dest_am | match_immediate);
2214 op1 = get_Shrs_left(val);
2215 op2 = get_Shrs_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2218 match_dest_am | match_immediate);
2221 op1 = get_Rotl_left(val);
2222 op2 = get_Rotl_right(val);
2223 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2224 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2225 match_dest_am | match_immediate);
2227 /* TODO: match ROR patterns... */
2229 new_node = try_create_SetMem(val, ptr, mem);
2232 op1 = get_Minus_op(val);
2233 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2236 /* should be lowered already */
2237 assert(mode != mode_b);
2238 op1 = get_Not_op(val);
2239 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2245 if(new_node != NULL) {
2246 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2247 get_irn_pinned(node) == op_pin_state_pinned) {
2248 set_irn_pinned(new_node, op_pin_state_pinned);
2255 static int is_float_to_int_conv(const ir_node *node)
2257 ir_mode *mode = get_irn_mode(node);
2261 if (mode != mode_Is && mode != mode_Hs)
2266 conv_op = get_Conv_op(node);
2267 conv_mode = get_irn_mode(conv_op);
2269 if(!mode_is_float(conv_mode))
2276 * Transform a Store(floatConst).
2278 * @return the created ia32 Store node
2280 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2282 ir_mode *mode = get_irn_mode(cns);
2283 unsigned size = get_mode_size_bytes(mode);
2284 tarval *tv = get_Const_tarval(cns);
2285 ir_node *block = get_nodes_block(node);
2286 ir_node *new_block = be_transform_node(block);
2287 ir_node *ptr = get_Store_ptr(node);
2288 ir_node *mem = get_Store_mem(node);
2289 ir_graph *irg = current_ir_graph;
2290 dbg_info *dbgi = get_irn_dbg_info(node);
2294 ia32_address_t addr;
2296 assert(size % 4 == 0);
2299 build_address_ptr(&addr, ptr, mem);
2303 get_tarval_sub_bits(tv, ofs) |
2304 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2305 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2306 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2307 ir_node *imm = create_Immediate(NULL, 0, val);
2309 ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2310 addr.index, addr.mem, imm);
2312 set_irn_pinned(new_node, get_irn_pinned(node));
2313 set_ia32_op_type(new_node, ia32_AddrModeD);
2314 set_ia32_ls_mode(new_node, mode_Iu);
2315 set_address(new_node, &addr);
2316 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2318 ins[i++] = new_node;
2323 } while (size != 0);
2325 return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
2329 * Generate a vfist or vfisttp instruction.
2331 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2332 ir_node *mem, ir_node *val, ir_node **fist)
2336 if (ia32_cg_config.use_fisttp) {
2337 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2338 if other users exists */
2339 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2340 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2341 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2342 be_new_Keep(reg_class, irg, block, 1, &value);
2344 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2347 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2350 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2356 * Transforms a normal Store.
2358 * @return the created ia32 Store node
2360 static ir_node *gen_normal_Store(ir_node *node)
2362 ir_node *val = get_Store_value(node);
2363 ir_mode *mode = get_irn_mode(val);
2364 ir_node *block = get_nodes_block(node);
2365 ir_node *new_block = be_transform_node(block);
2366 ir_node *ptr = get_Store_ptr(node);
2367 ir_node *mem = get_Store_mem(node);
2368 ir_graph *irg = current_ir_graph;
2369 dbg_info *dbgi = get_irn_dbg_info(node);
2370 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2371 ir_node *new_val, *new_node, *store;
2372 ia32_address_t addr;
2374 /* check for destination address mode */
2375 new_node = try_create_dest_am(node);
2376 if (new_node != NULL)
2379 /* construct store address */
2380 memset(&addr, 0, sizeof(addr));
2381 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2383 if (addr.base == NULL) {
2386 addr.base = be_transform_node(addr.base);
2389 if (addr.index == NULL) {
2392 addr.index = be_transform_node(addr.index);
2394 addr.mem = be_transform_node(mem);
2396 if (mode_is_float(mode)) {
2397 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2399 while (is_Conv(val) && mode == get_irn_mode(val)) {
2400 ir_node *op = get_Conv_op(val);
2401 if (!mode_is_float(get_irn_mode(op)))
2405 new_val = be_transform_node(val);
2406 if (ia32_cg_config.use_sse2) {
2407 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2408 addr.index, addr.mem, new_val);
2410 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2411 addr.index, addr.mem, new_val, mode);
2414 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2415 val = get_Conv_op(val);
2417 /* TODO: is this optimisation still necessary at all (middleend)? */
2418 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2419 while (is_Conv(val)) {
2420 ir_node *op = get_Conv_op(val);
2421 if (!mode_is_float(get_irn_mode(op)))
2423 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2427 new_val = be_transform_node(val);
2428 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2430 new_val = create_immediate_or_transform(val, 0);
2431 assert(mode != mode_b);
2433 if (get_mode_size_bits(mode) == 8) {
2434 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2435 addr.index, addr.mem, new_val);
2437 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2438 addr.index, addr.mem, new_val);
2443 set_irn_pinned(store, get_irn_pinned(node));
2444 set_ia32_op_type(store, ia32_AddrModeD);
2445 set_ia32_ls_mode(store, mode);
2447 set_address(store, &addr);
2448 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2454 * Transforms a Store.
2456 * @return the created ia32 Store node
2458 static ir_node *gen_Store(ir_node *node)
2460 ir_node *val = get_Store_value(node);
2461 ir_mode *mode = get_irn_mode(val);
2463 if (mode_is_float(mode) && is_Const(val)) {
2466 /* we are storing a floating point constant */
2467 if (ia32_cg_config.use_sse2) {
2468 transform = !is_simple_sse_Const(val);
2470 transform = !is_simple_x87_Const(val);
2473 return gen_float_const_Store(node, val);
2475 return gen_normal_Store(node);
2479 * Transforms a Switch.
2481 * @return the created ia32 SwitchJmp node
2483 static ir_node *create_Switch(ir_node *node)
2485 ir_graph *irg = current_ir_graph;
2486 dbg_info *dbgi = get_irn_dbg_info(node);
2487 ir_node *block = be_transform_node(get_nodes_block(node));
2488 ir_node *sel = get_Cond_selector(node);
2489 ir_node *new_sel = be_transform_node(sel);
2490 int switch_min = INT_MAX;
2491 int switch_max = INT_MIN;
2492 long default_pn = get_Cond_defaultProj(node);
2494 const ir_edge_t *edge;
2496 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2498 /* determine the smallest switch case value */
2499 foreach_out_edge(node, edge) {
2500 ir_node *proj = get_edge_src_irn(edge);
2501 long pn = get_Proj_proj(proj);
2502 if(pn == default_pn)
2511 if((unsigned) (switch_max - switch_min) > 256000) {
2512 panic("Size of switch %+F bigger than 256000", node);
2515 if (switch_min != 0) {
2516 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2518 /* if smallest switch case is not 0 we need an additional sub */
2519 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2520 add_ia32_am_offs_int(new_sel, -switch_min);
2521 set_ia32_op_type(new_sel, ia32_AddrModeS);
2523 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2526 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2527 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2533 * Transform a Cond node.
2535 static ir_node *gen_Cond(ir_node *node) {
2536 ir_node *block = get_nodes_block(node);
2537 ir_node *new_block = be_transform_node(block);
2538 ir_graph *irg = current_ir_graph;
2539 dbg_info *dbgi = get_irn_dbg_info(node);
2540 ir_node *sel = get_Cond_selector(node);
2541 ir_mode *sel_mode = get_irn_mode(sel);
2542 ir_node *flags = NULL;
2546 if (sel_mode != mode_b) {
2547 return create_Switch(node);
2550 /* we get flags from a Cmp */
2551 flags = get_flags_node(sel, &pnc);
2553 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2554 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2559 static ir_node *gen_be_Copy(ir_node *node)
2561 ir_node *new_node = be_duplicate_node(node);
2562 ir_mode *mode = get_irn_mode(new_node);
2564 if (ia32_mode_needs_gp_reg(mode)) {
2565 set_irn_mode(new_node, mode_Iu);
2571 static ir_node *create_Fucom(ir_node *node)
2573 ir_graph *irg = current_ir_graph;
2574 dbg_info *dbgi = get_irn_dbg_info(node);
2575 ir_node *block = get_nodes_block(node);
2576 ir_node *new_block = be_transform_node(block);
2577 ir_node *left = get_Cmp_left(node);
2578 ir_node *new_left = be_transform_node(left);
2579 ir_node *right = get_Cmp_right(node);
2583 if(ia32_cg_config.use_fucomi) {
2584 new_right = be_transform_node(right);
2585 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2587 set_ia32_commutative(new_node);
2588 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2590 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2591 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2594 new_right = be_transform_node(right);
2595 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2599 set_ia32_commutative(new_node);
2601 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2603 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2604 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2610 static ir_node *create_Ucomi(ir_node *node)
2612 ir_graph *irg = current_ir_graph;
2613 dbg_info *dbgi = get_irn_dbg_info(node);
2614 ir_node *src_block = get_nodes_block(node);
2615 ir_node *new_block = be_transform_node(src_block);
2616 ir_node *left = get_Cmp_left(node);
2617 ir_node *right = get_Cmp_right(node);
2619 ia32_address_mode_t am;
2620 ia32_address_t *addr = &am.addr;
2622 match_arguments(&am, src_block, left, right, NULL,
2623 match_commutative | match_am);
2625 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2626 addr->mem, am.new_op1, am.new_op2,
2628 set_am_attributes(new_node, &am);
2630 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2632 new_node = fix_mem_proj(new_node, &am);
2638 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2639 * to fold an and into a test node
2641 static bool can_fold_test_and(ir_node *node)
2643 const ir_edge_t *edge;
2645 /** we can only have eq and lg projs */
2646 foreach_out_edge(node, edge) {
2647 ir_node *proj = get_edge_src_irn(edge);
2648 pn_Cmp pnc = get_Proj_proj(proj);
2649 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2657 * returns true if it is assured, that the upper bits of a node are "clean"
2658 * which means for a 16 or 8 bit value, that the upper bits in the register
2659 * are 0 for unsigned and a copy of the last significant bit for signed
2662 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2664 assert(ia32_mode_needs_gp_reg(mode));
2665 if (get_mode_size_bits(mode) >= 32)
2668 if (is_Proj(transformed_node))
2669 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2671 if (is_ia32_Conv_I2I(transformed_node)
2672 || is_ia32_Conv_I2I8Bit(transformed_node)) {
2673 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2674 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2676 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2682 if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
2683 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2684 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2685 const ia32_immediate_attr_t *attr
2686 = get_ia32_immediate_attr_const(right);
2687 if (attr->symconst == 0
2688 && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
2692 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2695 if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
2696 ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
2697 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2698 const ia32_immediate_attr_t *attr
2699 = get_ia32_immediate_attr_const(right);
2700 if (attr->symconst == 0
2701 && (unsigned) attr->offset
2702 <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
2709 /* TODO recurse on Or, Xor, ... if appropriate? */
2711 if (is_ia32_Immediate(transformed_node)
2712 || is_ia32_Const(transformed_node)) {
2713 const ia32_immediate_attr_t *attr
2714 = get_ia32_immediate_attr_const(transformed_node);
2715 if (mode_is_signed(mode)) {
2716 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2717 if (shifted == 0 || shifted == -1)
2720 unsigned long shifted = (unsigned long) attr->offset;
2721 shifted >>= get_mode_size_bits(mode);
2731 * Generate code for a Cmp.
2733 static ir_node *gen_Cmp(ir_node *node)
2735 ir_graph *irg = current_ir_graph;
2736 dbg_info *dbgi = get_irn_dbg_info(node);
2737 ir_node *block = get_nodes_block(node);
2738 ir_node *new_block = be_transform_node(block);
2739 ir_node *left = get_Cmp_left(node);
2740 ir_node *right = get_Cmp_right(node);
2741 ir_mode *cmp_mode = get_irn_mode(left);
2743 ia32_address_mode_t am;
2744 ia32_address_t *addr = &am.addr;
2747 if(mode_is_float(cmp_mode)) {
2748 if (ia32_cg_config.use_sse2) {
2749 return create_Ucomi(node);
2751 return create_Fucom(node);
2755 assert(ia32_mode_needs_gp_reg(cmp_mode));
2757 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2758 cmp_unsigned = !mode_is_signed(cmp_mode);
2759 if (is_Const_0(right) &&
2761 get_irn_n_edges(left) == 1 &&
2762 can_fold_test_and(node)) {
2763 /* Test(and_left, and_right) */
2764 ir_node *and_left = get_And_left(left);
2765 ir_node *and_right = get_And_right(left);
2767 /* matze: code here used mode instead of cmd_mode, I think it is always
2768 * the same as cmp_mode, but I leave this here to see if this is really
2771 assert(get_irn_mode(and_left) == cmp_mode);
2773 match_arguments(&am, block, and_left, and_right, NULL,
2775 match_am | match_8bit_am | match_16bit_am |
2776 match_am_and_immediates | match_immediate |
2777 match_8bit | match_16bit);
2779 /* use 32bit compare mode if possible since the opcode is smaller */
2780 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2781 upper_bits_clean(am.new_op2, cmp_mode)) {
2782 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2785 if (get_mode_size_bits(cmp_mode) == 8) {
2786 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2787 addr->index, addr->mem, am.new_op1,
2788 am.new_op2, am.ins_permuted,
2791 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2792 addr->index, addr->mem, am.new_op1,
2793 am.new_op2, am.ins_permuted,
2797 /* Cmp(left, right) */
2798 match_arguments(&am, block, left, right, NULL,
2799 match_commutative | match_am | match_8bit_am |
2800 match_16bit_am | match_am_and_immediates |
2801 match_immediate | match_8bit | match_16bit);
2802 /* use 32bit compare mode if possible since the opcode is smaller */
2803 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2804 upper_bits_clean(am.new_op2, cmp_mode)) {
2805 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2808 if (get_mode_size_bits(cmp_mode) == 8) {
2809 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2810 addr->index, addr->mem, am.new_op1,
2811 am.new_op2, am.ins_permuted,
2814 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2815 addr->index, addr->mem, am.new_op1,
2816 am.new_op2, am.ins_permuted, cmp_unsigned);
2819 set_am_attributes(new_node, &am);
2820 set_ia32_ls_mode(new_node, cmp_mode);
2822 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2824 new_node = fix_mem_proj(new_node, &am);
2829 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2832 ir_graph *irg = current_ir_graph;
2833 dbg_info *dbgi = get_irn_dbg_info(node);
2834 ir_node *block = get_nodes_block(node);
2835 ir_node *new_block = be_transform_node(block);
2836 ir_node *val_true = get_Mux_true(node);
2837 ir_node *val_false = get_Mux_false(node);
2839 match_flags_t match_flags;
2840 ia32_address_mode_t am;
2841 ia32_address_t *addr;
2843 assert(ia32_cg_config.use_cmov);
2844 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2848 match_flags = match_commutative | match_am | match_16bit_am |
2851 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2853 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2854 addr->mem, am.new_op1, am.new_op2, new_flags,
2855 am.ins_permuted, pnc);
2856 set_am_attributes(new_node, &am);
2858 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2860 new_node = fix_mem_proj(new_node, &am);
2866 * Creates a ia32 Setcc instruction.
2868 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2869 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2872 ir_graph *irg = current_ir_graph;
2873 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2874 ir_node *nomem = new_NoMem();
2875 ir_mode *mode = get_irn_mode(orig_node);
2878 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2879 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2881 /* we might need to conv the result up */
2882 if (get_mode_size_bits(mode) > 8) {
2883 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2884 nomem, new_node, mode_Bu);
2885 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2892 * Create instruction for an unsigned Difference or Zero.
2894 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2895 ir_graph *irg = current_ir_graph;
2896 ir_mode *mode = get_irn_mode(psi);
2897 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2900 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2901 match_mode_neutral | match_am | match_immediate | match_two_users);
2903 block = get_nodes_block(new_node);
2905 if (is_Proj(new_node)) {
2906 sub = get_Proj_pred(new_node);
2907 assert(is_ia32_Sub(sub));
2910 set_irn_mode(sub, mode_T);
2911 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2913 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2915 dbgi = get_irn_dbg_info(psi);
2916 noreg = ia32_new_NoReg_gp(env_cg);
2917 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2918 nomem = new_NoMem();
2919 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2921 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2922 set_ia32_commutative(new_node);
2927 * Transforms a Mux node into CMov.
2929 * @return The transformed node.
2931 static ir_node *gen_Mux(ir_node *node)
2933 dbg_info *dbgi = get_irn_dbg_info(node);
2934 ir_node *block = get_nodes_block(node);
2935 ir_node *new_block = be_transform_node(block);
2936 ir_node *mux_true = get_Mux_true(node);
2937 ir_node *mux_false = get_Mux_false(node);
2938 ir_node *cond = get_Mux_sel(node);
2939 ir_mode *mode = get_irn_mode(node);
2942 assert(get_irn_mode(cond) == mode_b);
2944 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2945 if (mode_is_float(mode)) {
2946 ir_node *cmp = get_Proj_pred(cond);
2947 ir_node *cmp_left = get_Cmp_left(cmp);
2948 ir_node *cmp_right = get_Cmp_right(cmp);
2949 pn_Cmp pnc = get_Proj_proj(cond);
2951 if (ia32_cg_config.use_sse2) {
2952 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2953 if (cmp_left == mux_true && cmp_right == mux_false) {
2954 /* Mux(a <= b, a, b) => MIN */
2955 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2956 match_commutative | match_am | match_two_users);
2957 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2958 /* Mux(a <= b, b, a) => MAX */
2959 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2960 match_commutative | match_am | match_two_users);
2962 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2963 if (cmp_left == mux_true && cmp_right == mux_false) {
2964 /* Mux(a >= b, a, b) => MAX */
2965 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2966 match_commutative | match_am | match_two_users);
2967 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2968 /* Mux(a >= b, b, a) => MIN */
2969 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2970 match_commutative | match_am | match_two_users);
2974 panic("cannot transform floating point Mux");
2980 assert(ia32_mode_needs_gp_reg(mode));
2982 if (is_Proj(cond)) {
2983 ir_node *cmp = get_Proj_pred(cond);
2985 ir_node *cmp_left = get_Cmp_left(cmp);
2986 ir_node *cmp_right = get_Cmp_right(cmp);
2987 pn_Cmp pnc = get_Proj_proj(cond);
2989 /* check for unsigned Doz first */
2990 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
2991 is_Const_0(mux_false) && is_Sub(mux_true) &&
2992 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
2993 /* Mux(a >=u b, a - b, 0) unsigned Doz */
2994 return create_Doz(node, cmp_left, cmp_right);
2995 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
2996 is_Const_0(mux_true) && is_Sub(mux_false) &&
2997 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
2998 /* Mux(a <=u b, 0, a - b) unsigned Doz */
2999 return create_Doz(node, cmp_left, cmp_right);
3004 flags = get_flags_node(cond, &pnc);
3006 if (is_Const(mux_true) && is_Const(mux_false)) {
3007 /* both are const, good */
3008 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3009 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3010 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3011 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3013 /* Not that simple. */
3018 new_node = create_CMov(node, cond, flags, pnc);
3026 * Create a conversion from x87 state register to general purpose.
3028 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
3029 ir_node *block = be_transform_node(get_nodes_block(node));
3030 ir_node *op = get_Conv_op(node);
3031 ir_node *new_op = be_transform_node(op);
3032 ia32_code_gen_t *cg = env_cg;
3033 ir_graph *irg = current_ir_graph;
3034 dbg_info *dbgi = get_irn_dbg_info(node);
3035 ir_node *noreg = ia32_new_NoReg_gp(cg);
3036 ir_mode *mode = get_irn_mode(node);
3037 ir_node *fist, *load, *mem;
3039 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3040 set_irn_pinned(fist, op_pin_state_floats);
3041 set_ia32_use_frame(fist);
3042 set_ia32_op_type(fist, ia32_AddrModeD);
3044 assert(get_mode_size_bits(mode) <= 32);
3045 /* exception we can only store signed 32 bit integers, so for unsigned
3046 we store a 64bit (signed) integer and load the lower bits */
3047 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3048 set_ia32_ls_mode(fist, mode_Ls);
3050 set_ia32_ls_mode(fist, mode_Is);
3052 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3055 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3057 set_irn_pinned(load, op_pin_state_floats);
3058 set_ia32_use_frame(load);
3059 set_ia32_op_type(load, ia32_AddrModeS);
3060 set_ia32_ls_mode(load, mode_Is);
3061 if(get_ia32_ls_mode(fist) == mode_Ls) {
3062 ia32_attr_t *attr = get_ia32_attr(load);
3063 attr->data.need_64bit_stackent = 1;
3065 ia32_attr_t *attr = get_ia32_attr(load);
3066 attr->data.need_32bit_stackent = 1;
3068 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3070 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3074 * Creates a x87 strict Conv by placing a Store and a Load
3076 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3078 ir_node *block = get_nodes_block(node);
3079 ir_graph *irg = current_ir_graph;
3080 dbg_info *dbgi = get_irn_dbg_info(node);
3081 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3082 ir_node *nomem = new_NoMem();
3083 ir_node *frame = get_irg_frame(irg);
3084 ir_node *store, *load;
3087 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3089 set_ia32_use_frame(store);
3090 set_ia32_op_type(store, ia32_AddrModeD);
3091 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3093 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3095 set_ia32_use_frame(load);
3096 set_ia32_op_type(load, ia32_AddrModeS);
3097 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3099 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3104 * Create a conversion from general purpose to x87 register
3106 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3107 ir_node *src_block = get_nodes_block(node);
3108 ir_node *block = be_transform_node(src_block);
3109 ir_graph *irg = current_ir_graph;
3110 dbg_info *dbgi = get_irn_dbg_info(node);
3111 ir_node *op = get_Conv_op(node);
3112 ir_node *new_op = NULL;
3116 ir_mode *store_mode;
3122 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3123 if (src_mode == mode_Is || src_mode == mode_Hs) {
3124 ia32_address_mode_t am;
3126 match_arguments(&am, src_block, NULL, op, NULL,
3127 match_am | match_try_am | match_16bit | match_16bit_am);
3128 if (am.op_type == ia32_AddrModeS) {
3129 ia32_address_t *addr = &am.addr;
3131 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3132 addr->index, addr->mem);
3133 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3136 set_am_attributes(fild, &am);
3137 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3139 fix_mem_proj(fild, &am);
3144 if(new_op == NULL) {
3145 new_op = be_transform_node(op);
3148 noreg = ia32_new_NoReg_gp(env_cg);
3149 nomem = new_NoMem();
3150 mode = get_irn_mode(op);
3152 /* first convert to 32 bit signed if necessary */
3153 src_bits = get_mode_size_bits(src_mode);
3154 if (src_bits == 8) {
3155 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3157 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3159 } else if (src_bits < 32) {
3160 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3162 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3166 assert(get_mode_size_bits(mode) == 32);
3169 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3172 set_ia32_use_frame(store);
3173 set_ia32_op_type(store, ia32_AddrModeD);
3174 set_ia32_ls_mode(store, mode_Iu);
3176 /* exception for 32bit unsigned, do a 64bit spill+load */
3177 if(!mode_is_signed(mode)) {
3180 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3182 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3183 get_irg_frame(irg), noreg, nomem,
3186 set_ia32_use_frame(zero_store);
3187 set_ia32_op_type(zero_store, ia32_AddrModeD);
3188 add_ia32_am_offs_int(zero_store, 4);
3189 set_ia32_ls_mode(zero_store, mode_Iu);
3194 store = new_rd_Sync(dbgi, irg, block, 2, in);
3195 store_mode = mode_Ls;
3197 store_mode = mode_Is;
3201 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3203 set_ia32_use_frame(fild);
3204 set_ia32_op_type(fild, ia32_AddrModeS);
3205 set_ia32_ls_mode(fild, store_mode);
3207 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3213 * Create a conversion from one integer mode into another one
3215 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3216 dbg_info *dbgi, ir_node *block, ir_node *op,
3219 ir_graph *irg = current_ir_graph;
3220 int src_bits = get_mode_size_bits(src_mode);
3221 int tgt_bits = get_mode_size_bits(tgt_mode);
3222 ir_node *new_block = be_transform_node(block);
3224 ir_mode *smaller_mode;
3226 ia32_address_mode_t am;
3227 ia32_address_t *addr = &am.addr;
3230 if (src_bits < tgt_bits) {
3231 smaller_mode = src_mode;
3232 smaller_bits = src_bits;
3234 smaller_mode = tgt_mode;
3235 smaller_bits = tgt_bits;
3238 #ifdef DEBUG_libfirm
3240 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3245 match_arguments(&am, block, NULL, op, NULL,
3246 match_8bit | match_16bit |
3247 match_am | match_8bit_am | match_16bit_am);
3249 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3250 /* unnecessary conv. in theory it shouldn't have been AM */
3251 assert(is_ia32_NoReg_GP(addr->base));
3252 assert(is_ia32_NoReg_GP(addr->index));
3253 assert(is_NoMem(addr->mem));
3254 assert(am.addr.offset == 0);
3255 assert(am.addr.symconst_ent == NULL);
3259 if (smaller_bits == 8) {
3260 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3261 addr->index, addr->mem, am.new_op2,
3264 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3265 addr->index, addr->mem, am.new_op2,
3268 set_am_attributes(new_node, &am);
3269 /* match_arguments assume that out-mode = in-mode, this isn't true here
3271 set_ia32_ls_mode(new_node, smaller_mode);
3272 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3273 new_node = fix_mem_proj(new_node, &am);
3278 * Transforms a Conv node.
3280 * @return The created ia32 Conv node
3282 static ir_node *gen_Conv(ir_node *node) {
3283 ir_node *block = get_nodes_block(node);
3284 ir_node *new_block = be_transform_node(block);
3285 ir_node *op = get_Conv_op(node);
3286 ir_node *new_op = NULL;
3287 ir_graph *irg = current_ir_graph;
3288 dbg_info *dbgi = get_irn_dbg_info(node);
3289 ir_mode *src_mode = get_irn_mode(op);
3290 ir_mode *tgt_mode = get_irn_mode(node);
3291 int src_bits = get_mode_size_bits(src_mode);
3292 int tgt_bits = get_mode_size_bits(tgt_mode);
3293 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3294 ir_node *nomem = new_rd_NoMem(irg);
3295 ir_node *res = NULL;
3297 if (src_mode == mode_b) {
3298 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3299 /* nothing to do, we already model bools as 0/1 ints */
3300 return be_transform_node(op);
3303 if (src_mode == tgt_mode) {
3304 if (get_Conv_strict(node)) {
3305 if (ia32_cg_config.use_sse2) {
3306 /* when we are in SSE mode, we can kill all strict no-op conversion */
3307 return be_transform_node(op);
3310 /* this should be optimized already, but who knows... */
3311 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3312 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3313 return be_transform_node(op);
3317 if (mode_is_float(src_mode)) {
3318 new_op = be_transform_node(op);
3319 /* we convert from float ... */
3320 if (mode_is_float(tgt_mode)) {
3321 if(src_mode == mode_E && tgt_mode == mode_D
3322 && !get_Conv_strict(node)) {
3323 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3328 if (ia32_cg_config.use_sse2) {
3329 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3330 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3332 set_ia32_ls_mode(res, tgt_mode);
3334 if(get_Conv_strict(node)) {
3335 res = gen_x87_strict_conv(tgt_mode, new_op);
3336 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3339 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3344 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3345 if (ia32_cg_config.use_sse2) {
3346 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3348 set_ia32_ls_mode(res, src_mode);
3350 return gen_x87_fp_to_gp(node);
3354 /* we convert from int ... */
3355 if (mode_is_float(tgt_mode)) {
3357 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3358 if (ia32_cg_config.use_sse2) {
3359 new_op = be_transform_node(op);
3360 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3362 set_ia32_ls_mode(res, tgt_mode);
3364 res = gen_x87_gp_to_fp(node, src_mode);
3365 if(get_Conv_strict(node)) {
3366 /* The strict-Conv is only necessary, if the int mode has more bits
3367 * than the float mantissa */
3368 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3369 size_t float_mantissa;
3370 /* FIXME There is no way to get the mantissa size of a mode */
3371 switch (get_mode_size_bits(tgt_mode)) {
3372 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3373 case 64: float_mantissa = 52 + 1; break;
3375 case 96: float_mantissa = 64; break;
3376 default: float_mantissa = 0; break;
3378 if (float_mantissa < int_mantissa) {
3379 res = gen_x87_strict_conv(tgt_mode, res);
3380 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3385 } else if(tgt_mode == mode_b) {
3386 /* mode_b lowering already took care that we only have 0/1 values */
3387 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3388 src_mode, tgt_mode));
3389 return be_transform_node(op);
3392 if (src_bits == tgt_bits) {
3393 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3394 src_mode, tgt_mode));
3395 return be_transform_node(op);
3398 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3406 static ir_node *create_immediate_or_transform(ir_node *node,
3407 char immediate_constraint_type)
3409 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3410 if (new_node == NULL) {
3411 new_node = be_transform_node(node);
3417 * Transforms a FrameAddr into an ia32 Add.
3419 static ir_node *gen_be_FrameAddr(ir_node *node) {
3420 ir_node *block = be_transform_node(get_nodes_block(node));
3421 ir_node *op = be_get_FrameAddr_frame(node);
3422 ir_node *new_op = be_transform_node(op);
3423 ir_graph *irg = current_ir_graph;
3424 dbg_info *dbgi = get_irn_dbg_info(node);
3425 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3428 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3429 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3430 set_ia32_use_frame(new_node);
3432 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3438 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3440 static ir_node *gen_be_Return(ir_node *node) {
3441 ir_graph *irg = current_ir_graph;
3442 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3443 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3444 ir_entity *ent = get_irg_entity(irg);
3445 ir_type *tp = get_entity_type(ent);
3450 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3451 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3454 int pn_ret_val, pn_ret_mem, arity, i;
3456 assert(ret_val != NULL);
3457 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3458 return be_duplicate_node(node);
3461 res_type = get_method_res_type(tp, 0);
3463 if (! is_Primitive_type(res_type)) {
3464 return be_duplicate_node(node);
3467 mode = get_type_mode(res_type);
3468 if (! mode_is_float(mode)) {
3469 return be_duplicate_node(node);
3472 assert(get_method_n_ress(tp) == 1);
3474 pn_ret_val = get_Proj_proj(ret_val);
3475 pn_ret_mem = get_Proj_proj(ret_mem);
3477 /* get the Barrier */
3478 barrier = get_Proj_pred(ret_val);
3480 /* get result input of the Barrier */
3481 ret_val = get_irn_n(barrier, pn_ret_val);
3482 new_ret_val = be_transform_node(ret_val);
3484 /* get memory input of the Barrier */
3485 ret_mem = get_irn_n(barrier, pn_ret_mem);
3486 new_ret_mem = be_transform_node(ret_mem);
3488 frame = get_irg_frame(irg);
3490 dbgi = get_irn_dbg_info(barrier);
3491 block = be_transform_node(get_nodes_block(barrier));
3493 noreg = ia32_new_NoReg_gp(env_cg);
3495 /* store xmm0 onto stack */
3496 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3497 new_ret_mem, new_ret_val);
3498 set_ia32_ls_mode(sse_store, mode);
3499 set_ia32_op_type(sse_store, ia32_AddrModeD);
3500 set_ia32_use_frame(sse_store);
3502 /* load into x87 register */
3503 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3504 set_ia32_op_type(fld, ia32_AddrModeS);
3505 set_ia32_use_frame(fld);
3507 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3508 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3510 /* create a new barrier */
3511 arity = get_irn_arity(barrier);
3512 in = alloca(arity * sizeof(in[0]));
3513 for (i = 0; i < arity; ++i) {
3516 if (i == pn_ret_val) {
3518 } else if (i == pn_ret_mem) {
3521 ir_node *in = get_irn_n(barrier, i);
3522 new_in = be_transform_node(in);
3527 new_barrier = new_ir_node(dbgi, irg, block,
3528 get_irn_op(barrier), get_irn_mode(barrier),
3530 copy_node_attr(barrier, new_barrier);
3531 be_duplicate_deps(barrier, new_barrier);
3532 be_set_transformed_node(barrier, new_barrier);
3534 /* transform normally */
3535 return be_duplicate_node(node);
3539 * Transform a be_AddSP into an ia32_SubSP.
3541 static ir_node *gen_be_AddSP(ir_node *node)
3543 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3544 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3546 return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
3547 match_am | match_immediate);
3551 * Transform a be_SubSP into an ia32_AddSP
3553 static ir_node *gen_be_SubSP(ir_node *node)
3555 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3556 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3558 return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
3559 match_am | match_immediate);
3563 * Change some phi modes
3565 static ir_node *gen_Phi(ir_node *node) {
3566 ir_node *block = be_transform_node(get_nodes_block(node));
3567 ir_graph *irg = current_ir_graph;
3568 dbg_info *dbgi = get_irn_dbg_info(node);
3569 ir_mode *mode = get_irn_mode(node);
3572 if(ia32_mode_needs_gp_reg(mode)) {
3573 /* we shouldn't have any 64bit stuff around anymore */
3574 assert(get_mode_size_bits(mode) <= 32);
3575 /* all integer operations are on 32bit registers now */
3577 } else if(mode_is_float(mode)) {
3578 if (ia32_cg_config.use_sse2) {
3585 /* phi nodes allow loops, so we use the old arguments for now
3586 * and fix this later */
3587 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3588 get_irn_in(node) + 1);
3589 copy_node_attr(node, phi);
3590 be_duplicate_deps(node, phi);
3592 be_enqueue_preds(node);
3600 static ir_node *gen_IJmp(ir_node *node)
3602 ir_node *block = get_nodes_block(node);
3603 ir_node *new_block = be_transform_node(block);
3604 dbg_info *dbgi = get_irn_dbg_info(node);
3605 ir_node *op = get_IJmp_target(node);
3607 ia32_address_mode_t am;
3608 ia32_address_t *addr = &am.addr;
3610 assert(get_irn_mode(op) == mode_P);
3612 match_arguments(&am, block, NULL, op, NULL,
3613 match_am | match_8bit_am | match_16bit_am |
3614 match_immediate | match_8bit | match_16bit);
3616 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3617 addr->base, addr->index, addr->mem,
3619 set_am_attributes(new_node, &am);
3620 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3622 new_node = fix_mem_proj(new_node, &am);
3628 * Transform a Bound node.
3630 static ir_node *gen_Bound(ir_node *node)
3633 ir_node *lower = get_Bound_lower(node);
3634 dbg_info *dbgi = get_irn_dbg_info(node);
3636 if (is_Const_0(lower)) {
3637 /* typical case for Java */
3638 ir_node *sub, *res, *flags, *block;
3639 ir_graph *irg = current_ir_graph;
3641 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3642 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3644 block = get_nodes_block(res);
3645 if (! is_Proj(res)) {
3647 set_irn_mode(sub, mode_T);
3648 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3650 sub = get_Proj_pred(res);
3652 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3653 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3654 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3656 panic("generic Bound not supported in ia32 Backend");
3662 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3664 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3665 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3667 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3668 match_immediate | match_mode_neutral);
3671 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3673 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3674 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3675 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3679 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3681 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3682 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3683 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3687 static ir_node *gen_ia32_l_Add(ir_node *node) {
3688 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3689 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3690 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3691 match_commutative | match_am | match_immediate |
3692 match_mode_neutral);
3694 if(is_Proj(lowered)) {
3695 lowered = get_Proj_pred(lowered);
3697 assert(is_ia32_Add(lowered));
3698 set_irn_mode(lowered, mode_T);
3704 static ir_node *gen_ia32_l_Adc(ir_node *node)
3706 return gen_binop_flags(node, new_rd_ia32_Adc,
3707 match_commutative | match_am | match_immediate |
3708 match_mode_neutral);
3712 * Transforms a l_MulS into a "real" MulS node.
3714 * @return the created ia32 Mul node
3716 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3717 ir_node *left = get_binop_left(node);
3718 ir_node *right = get_binop_right(node);
3720 return gen_binop(node, left, right, new_rd_ia32_Mul,
3721 match_commutative | match_am | match_mode_neutral);
3725 * Transforms a l_IMulS into a "real" IMul1OPS node.
3727 * @return the created ia32 IMul1OP node
3729 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3730 ir_node *left = get_binop_left(node);
3731 ir_node *right = get_binop_right(node);
3733 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3734 match_commutative | match_am | match_mode_neutral);
3737 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3738 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3739 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3740 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3741 match_am | match_immediate | match_mode_neutral);
3743 if(is_Proj(lowered)) {
3744 lowered = get_Proj_pred(lowered);
3746 assert(is_ia32_Sub(lowered));
3747 set_irn_mode(lowered, mode_T);
3753 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3754 return gen_binop_flags(node, new_rd_ia32_Sbb,
3755 match_am | match_immediate | match_mode_neutral);
3759 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3760 * op1 - target to be shifted
3761 * op2 - contains bits to be shifted into target
3763 * Only op3 can be an immediate.
3765 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3766 ir_node *low, ir_node *count)
3768 ir_node *block = get_nodes_block(node);
3769 ir_node *new_block = be_transform_node(block);
3770 ir_graph *irg = current_ir_graph;
3771 dbg_info *dbgi = get_irn_dbg_info(node);
3772 ir_node *new_high = be_transform_node(high);
3773 ir_node *new_low = be_transform_node(low);
3777 /* the shift amount can be any mode that is bigger than 5 bits, since all
3778 * other bits are ignored anyway */
3779 while (is_Conv(count) &&
3780 get_irn_n_edges(count) == 1 &&
3781 mode_is_int(get_irn_mode(count))) {
3782 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3783 count = get_Conv_op(count);
3785 new_count = create_immediate_or_transform(count, 0);
3787 if (is_ia32_l_ShlD(node)) {
3788 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3791 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3794 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3799 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3801 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3802 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3803 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3804 return gen_lowered_64bit_shifts(node, high, low, count);
3807 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3809 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3810 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3811 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3812 return gen_lowered_64bit_shifts(node, high, low, count);
3815 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3816 ir_node *src_block = get_nodes_block(node);
3817 ir_node *block = be_transform_node(src_block);
3818 ir_graph *irg = current_ir_graph;
3819 dbg_info *dbgi = get_irn_dbg_info(node);
3820 ir_node *frame = get_irg_frame(irg);
3821 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3822 ir_node *nomem = new_NoMem();
3823 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3824 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3825 ir_node *new_val_low = be_transform_node(val_low);
3826 ir_node *new_val_high = be_transform_node(val_high);
3831 ir_node *store_high;
3833 if(!mode_is_signed(get_irn_mode(val_high))) {
3834 panic("unsigned long long -> float not supported yet (%+F)", node);
3838 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3840 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3842 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3843 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3845 set_ia32_use_frame(store_low);
3846 set_ia32_use_frame(store_high);
3847 set_ia32_op_type(store_low, ia32_AddrModeD);
3848 set_ia32_op_type(store_high, ia32_AddrModeD);
3849 set_ia32_ls_mode(store_low, mode_Iu);
3850 set_ia32_ls_mode(store_high, mode_Is);
3851 add_ia32_am_offs_int(store_high, 4);
3855 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3858 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3860 set_ia32_use_frame(fild);
3861 set_ia32_op_type(fild, ia32_AddrModeS);
3862 set_ia32_ls_mode(fild, mode_Ls);
3864 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3866 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3869 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3870 ir_node *src_block = get_nodes_block(node);
3871 ir_node *block = be_transform_node(src_block);
3872 ir_graph *irg = current_ir_graph;
3873 dbg_info *dbgi = get_irn_dbg_info(node);
3874 ir_node *frame = get_irg_frame(irg);
3875 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3876 ir_node *nomem = new_NoMem();
3877 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3878 ir_node *new_val = be_transform_node(val);
3879 ir_node *fist, *mem;
3881 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3882 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3883 set_ia32_use_frame(fist);
3884 set_ia32_op_type(fist, ia32_AddrModeD);
3885 set_ia32_ls_mode(fist, mode_Ls);
3891 * the BAD transformer.
3893 static ir_node *bad_transform(ir_node *node) {
3894 panic("No transform function for %+F available.", node);
3898 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3899 ir_graph *irg = current_ir_graph;
3900 ir_node *block = be_transform_node(get_nodes_block(node));
3901 ir_node *pred = get_Proj_pred(node);
3902 ir_node *new_pred = be_transform_node(pred);
3903 ir_node *frame = get_irg_frame(irg);
3904 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3905 dbg_info *dbgi = get_irn_dbg_info(node);
3906 long pn = get_Proj_proj(node);
3911 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3912 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3913 set_ia32_use_frame(load);
3914 set_ia32_op_type(load, ia32_AddrModeS);
3915 set_ia32_ls_mode(load, mode_Iu);
3916 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3917 * 32 bit from it with this particular load */
3918 attr = get_ia32_attr(load);
3919 attr->data.need_64bit_stackent = 1;
3921 if (pn == pn_ia32_l_FloattoLL_res_high) {
3922 add_ia32_am_offs_int(load, 4);
3924 assert(pn == pn_ia32_l_FloattoLL_res_low);
3927 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3933 * Transform the Projs of an AddSP.
3935 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
3936 ir_node *block = be_transform_node(get_nodes_block(node));
3937 ir_node *pred = get_Proj_pred(node);
3938 ir_node *new_pred = be_transform_node(pred);
3939 ir_graph *irg = current_ir_graph;
3940 dbg_info *dbgi = get_irn_dbg_info(node);
3941 long proj = get_Proj_proj(node);
3943 if (proj == pn_be_AddSP_sp) {
3944 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3945 pn_ia32_SubSP_stack);
3946 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3948 } else if(proj == pn_be_AddSP_res) {
3949 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3950 pn_ia32_SubSP_addr);
3951 } else if (proj == pn_be_AddSP_M) {
3952 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
3955 panic("No idea how to transform proj->AddSP");
3959 * Transform the Projs of a SubSP.
3961 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
3962 ir_node *block = be_transform_node(get_nodes_block(node));
3963 ir_node *pred = get_Proj_pred(node);
3964 ir_node *new_pred = be_transform_node(pred);
3965 ir_graph *irg = current_ir_graph;
3966 dbg_info *dbgi = get_irn_dbg_info(node);
3967 long proj = get_Proj_proj(node);
3969 if (proj == pn_be_SubSP_sp) {
3970 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3971 pn_ia32_AddSP_stack);
3972 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3974 } else if (proj == pn_be_SubSP_M) {
3975 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
3978 panic("No idea how to transform proj->SubSP");
3982 * Transform and renumber the Projs from a Load.
3984 static ir_node *gen_Proj_Load(ir_node *node) {
3986 ir_node *block = be_transform_node(get_nodes_block(node));
3987 ir_node *pred = get_Proj_pred(node);
3988 ir_graph *irg = current_ir_graph;
3989 dbg_info *dbgi = get_irn_dbg_info(node);
3990 long proj = get_Proj_proj(node);
3992 /* loads might be part of source address mode matches, so we don't
3993 * transform the ProjMs yet (with the exception of loads whose result is
3996 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
3999 /* this is needed, because sometimes we have loops that are only
4000 reachable through the ProjM */
4001 be_enqueue_preds(node);
4002 /* do it in 2 steps, to silence firm verifier */
4003 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4004 set_Proj_proj(res, pn_ia32_mem);
4008 /* renumber the proj */
4009 new_pred = be_transform_node(pred);
4010 if (is_ia32_Load(new_pred)) {
4013 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4015 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4016 case pn_Load_X_regular:
4017 return new_rd_Jmp(dbgi, irg, block);
4018 case pn_Load_X_except:
4019 /* This Load might raise an exception. Mark it. */
4020 set_ia32_exc_label(new_pred, 1);
4021 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4025 } else if (is_ia32_Conv_I2I(new_pred) ||
4026 is_ia32_Conv_I2I8Bit(new_pred)) {
4027 set_irn_mode(new_pred, mode_T);
4028 if (proj == pn_Load_res) {
4029 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4030 } else if (proj == pn_Load_M) {
4031 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4033 } else if (is_ia32_xLoad(new_pred)) {
4036 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4038 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4039 case pn_Load_X_regular:
4040 return new_rd_Jmp(dbgi, irg, block);
4041 case pn_Load_X_except:
4042 /* This Load might raise an exception. Mark it. */
4043 set_ia32_exc_label(new_pred, 1);
4044 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4048 } else if (is_ia32_vfld(new_pred)) {
4051 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4053 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4054 case pn_Load_X_regular:
4055 return new_rd_Jmp(dbgi, irg, block);
4056 case pn_Load_X_except:
4057 /* This Load might raise an exception. Mark it. */
4058 set_ia32_exc_label(new_pred, 1);
4059 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4064 /* can happen for ProJMs when source address mode happened for the
4067 /* however it should not be the result proj, as that would mean the
4068 load had multiple users and should not have been used for
4070 if (proj != pn_Load_M) {
4071 panic("internal error: transformed node not a Load");
4073 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4076 panic("No idea how to transform proj");
4080 * Transform and renumber the Projs from a DivMod like instruction.
4082 static ir_node *gen_Proj_DivMod(ir_node *node) {
4083 ir_node *block = be_transform_node(get_nodes_block(node));
4084 ir_node *pred = get_Proj_pred(node);
4085 ir_node *new_pred = be_transform_node(pred);
4086 ir_graph *irg = current_ir_graph;
4087 dbg_info *dbgi = get_irn_dbg_info(node);
4088 long proj = get_Proj_proj(node);
4090 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4092 switch (get_irn_opcode(pred)) {
4096 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4098 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4099 case pn_Div_X_regular:
4100 return new_rd_Jmp(dbgi, irg, block);
4101 case pn_Div_X_except:
4102 set_ia32_exc_label(new_pred, 1);
4103 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4111 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4113 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4114 case pn_Mod_X_except:
4115 set_ia32_exc_label(new_pred, 1);
4116 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4124 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4125 case pn_DivMod_res_div:
4126 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4127 case pn_DivMod_res_mod:
4128 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4129 case pn_DivMod_X_regular:
4130 return new_rd_Jmp(dbgi, irg, block);
4131 case pn_DivMod_X_except:
4132 set_ia32_exc_label(new_pred, 1);
4133 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4142 panic("No idea how to transform proj->DivMod");
4146 * Transform and renumber the Projs from a CopyB.
4148 static ir_node *gen_Proj_CopyB(ir_node *node) {
4149 ir_node *block = be_transform_node(get_nodes_block(node));
4150 ir_node *pred = get_Proj_pred(node);
4151 ir_node *new_pred = be_transform_node(pred);
4152 ir_graph *irg = current_ir_graph;
4153 dbg_info *dbgi = get_irn_dbg_info(node);
4154 long proj = get_Proj_proj(node);
4157 case pn_CopyB_M_regular:
4158 if (is_ia32_CopyB_i(new_pred)) {
4159 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4160 } else if (is_ia32_CopyB(new_pred)) {
4161 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4168 panic("No idea how to transform proj->CopyB");
4172 * Transform and renumber the Projs from a Quot.
4174 static ir_node *gen_Proj_Quot(ir_node *node) {
4175 ir_node *block = be_transform_node(get_nodes_block(node));
4176 ir_node *pred = get_Proj_pred(node);
4177 ir_node *new_pred = be_transform_node(pred);
4178 ir_graph *irg = current_ir_graph;
4179 dbg_info *dbgi = get_irn_dbg_info(node);
4180 long proj = get_Proj_proj(node);
4184 if (is_ia32_xDiv(new_pred)) {
4185 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4186 } else if (is_ia32_vfdiv(new_pred)) {
4187 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4191 if (is_ia32_xDiv(new_pred)) {
4192 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4193 } else if (is_ia32_vfdiv(new_pred)) {
4194 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4197 case pn_Quot_X_regular:
4198 case pn_Quot_X_except:
4203 panic("No idea how to transform proj->Quot");
4206 static ir_node *gen_be_Call(ir_node *node)
4208 dbg_info *const dbgi = get_irn_dbg_info(node);
4209 ir_graph *const irg = current_ir_graph;
4210 ir_node *const src_block = get_nodes_block(node);
4211 ir_node *const block = be_transform_node(src_block);
4212 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4213 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4214 ir_node *const sp = be_transform_node(src_sp);
4215 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4216 ir_node *const noreg = ia32_new_NoReg_gp(env_cg);
4217 ia32_address_mode_t am;
4218 ia32_address_t *const addr = &am.addr;
4223 ir_node * eax = noreg;
4224 ir_node * ecx = noreg;
4225 ir_node * edx = noreg;
4226 unsigned const pop = be_Call_get_pop(node);
4227 ir_type *const call_tp = be_Call_get_type(node);
4229 /* Run the x87 simulator if the call returns a float value */
4230 if (get_method_n_ress(call_tp) > 0) {
4231 ir_type *const res_type = get_method_res_type(call_tp, 0);
4232 ir_mode *const res_mode = get_type_mode(res_type);
4234 if (res_mode != NULL && mode_is_float(res_mode)) {
4235 env_cg->do_x87_sim = 1;
4239 /* We do not want be_Call direct calls */
4240 assert(be_Call_get_entity(node) == NULL);
4242 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4243 match_am | match_immediate);
4245 i = get_irn_arity(node) - 1;
4246 fpcw = be_transform_node(get_irn_n(node, i--));
4247 for (; i >= be_pos_Call_first_arg; --i) {
4248 arch_register_req_t const *const req =
4249 arch_get_register_req(env_cg->arch_env, node, i);
4250 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4252 assert(req->type == arch_register_req_type_limited);
4253 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4255 switch (*req->limited) {
4256 case 1 << REG_EAX: assert(eax == noreg); eax = reg_parm; break;
4257 case 1 << REG_ECX: assert(ecx == noreg); ecx = reg_parm; break;
4258 case 1 << REG_EDX: assert(edx == noreg); edx = reg_parm; break;
4259 default: panic("Invalid GP register for register parameter");
4263 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4264 call = new_rd_ia32_Call(dbgi, irg, block, addr->base, addr->index, mem,
4265 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4266 set_am_attributes(call, &am);
4267 call = fix_mem_proj(call, &am);
4269 if (get_irn_pinned(node) == op_pin_state_pinned)
4270 set_irn_pinned(call, op_pin_state_pinned);
4272 SET_IA32_ORIG_NODE(call, ia32_get_old_node_name(env_cg, node));
4276 static ir_node *gen_be_IncSP(ir_node *node) {
4277 ir_node *res = be_duplicate_node(node);
4278 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4284 * Transform the Projs from a be_Call.
4286 static ir_node *gen_Proj_be_Call(ir_node *node)
4288 ir_node *block = be_transform_node(get_nodes_block(node));
4289 ir_node *call = get_Proj_pred(node);
4290 ir_node *new_call = be_transform_node(call);
4291 ir_graph *irg = current_ir_graph;
4292 dbg_info *dbgi = get_irn_dbg_info(node);
4293 ir_type *method_type = be_Call_get_type(call);
4294 int n_res = get_method_n_ress(method_type);
4295 long proj = get_Proj_proj(node);
4296 ir_mode *mode = get_irn_mode(node);
4298 const arch_register_class_t *cls;
4301 /* The following is kinda tricky: If we're using SSE, then we have to
4302 * move the result value of the call in floating point registers to an
4303 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4304 * after the call, we have to make sure to correctly make the
4305 * MemProj and the result Proj use these 2 nodes
4307 if (proj == pn_be_Call_M_regular) {
4308 // get new node for result, are we doing the sse load/store hack?
4309 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4310 ir_node *call_res_new;
4311 ir_node *call_res_pred = NULL;
4313 if (call_res != NULL) {
4314 call_res_new = be_transform_node(call_res);
4315 call_res_pred = get_Proj_pred(call_res_new);
4318 if (call_res_pred == NULL || is_ia32_Call(call_res_pred)) {
4319 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4322 assert(is_ia32_xLoad(call_res_pred));
4323 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4327 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4328 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4330 ir_node *frame = get_irg_frame(irg);
4331 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4333 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4336 /* in case there is no memory output: create one to serialize the copy
4338 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4339 pn_be_Call_M_regular);
4340 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4341 pn_be_Call_first_res);
4343 /* store st(0) onto stack */
4344 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4346 set_ia32_op_type(fstp, ia32_AddrModeD);
4347 set_ia32_use_frame(fstp);
4349 /* load into SSE register */
4350 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4352 set_ia32_op_type(sse_load, ia32_AddrModeS);
4353 set_ia32_use_frame(sse_load);
4355 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4361 /* transform call modes */
4362 if (mode_is_data(mode)) {
4363 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4367 /* Map from be_Call to ia32_Call proj number */
4368 if (proj == pn_be_Call_sp) {
4369 proj = pn_ia32_Call_stack;
4370 } else if (proj == pn_be_Call_M_regular) {
4371 proj = pn_ia32_Call_M;
4373 arch_register_req_t const *const req = arch_get_register_req(env_cg->arch_env, node, BE_OUT_POS(proj));
4374 int const n_outs = get_ia32_n_res(new_call);
4377 assert(proj >= pn_be_Call_first_res);
4378 assert(req->type == arch_register_req_type_limited);
4380 for (i = 0; i < n_outs; ++i) {
4381 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
4383 if (new_req->type != arch_register_req_type_limited ||
4384 new_req->cls != req->cls ||
4385 *new_req->limited != *req->limited)
4394 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4396 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
4398 case pn_ia32_Call_stack:
4399 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4402 case pn_ia32_Call_fpcw:
4403 arch_set_irn_register(env_cg->arch_env, res, &ia32_fp_cw_regs[REG_FPCW]);
4411 * Transform the Projs from a Cmp.
4413 static ir_node *gen_Proj_Cmp(ir_node *node)
4415 /* this probably means not all mode_b nodes were lowered... */
4416 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4421 * Transform the Projs from a Bound.
4423 static ir_node *gen_Proj_Bound(ir_node *node)
4425 ir_node *new_node, *block;
4426 ir_node *pred = get_Proj_pred(node);
4428 switch (get_Proj_proj(node)) {
4430 return be_transform_node(get_Bound_mem(pred));
4431 case pn_Bound_X_regular:
4432 new_node = be_transform_node(pred);
4433 block = get_nodes_block(new_node);
4434 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4435 case pn_Bound_X_except:
4436 new_node = be_transform_node(pred);
4437 block = get_nodes_block(new_node);
4438 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4440 return be_transform_node(get_Bound_index(pred));
4442 panic("unsupported Proj from Bound");
4446 static ir_node *gen_Proj_ASM(ir_node *node)
4452 if (get_irn_mode(node) != mode_M)
4453 return be_duplicate_node(node);
4455 pred = get_Proj_pred(node);
4456 new_pred = be_transform_node(pred);
4457 block = get_nodes_block(new_pred);
4458 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4459 get_ia32_n_res(new_pred) + 1);
4463 * Transform and potentially renumber Proj nodes.
4465 static ir_node *gen_Proj(ir_node *node) {
4466 ir_node *pred = get_Proj_pred(node);
4469 switch (get_irn_opcode(pred)) {
4471 proj = get_Proj_proj(node);
4472 if (proj == pn_Store_M) {
4473 return be_transform_node(pred);
4475 panic("No idea how to transform proj->Store");
4478 return gen_Proj_Load(node);
4480 return gen_Proj_ASM(node);
4484 return gen_Proj_DivMod(node);
4486 return gen_Proj_CopyB(node);
4488 return gen_Proj_Quot(node);
4490 return gen_Proj_be_SubSP(node);
4492 return gen_Proj_be_AddSP(node);
4494 return gen_Proj_be_Call(node);
4496 return gen_Proj_Cmp(node);
4498 return gen_Proj_Bound(node);
4500 proj = get_Proj_proj(node);
4502 case pn_Start_X_initial_exec: {
4503 ir_node *block = get_nodes_block(pred);
4504 ir_node *new_block = be_transform_node(block);
4505 dbg_info *dbgi = get_irn_dbg_info(node);
4506 /* we exchange the ProjX with a jump */
4507 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
4512 case pn_Start_P_tls:
4513 return gen_Proj_tls(node);
4518 if (is_ia32_l_FloattoLL(pred)) {
4519 return gen_Proj_l_FloattoLL(node);
4521 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4525 ir_mode *mode = get_irn_mode(node);
4526 if (ia32_mode_needs_gp_reg(mode)) {
4527 ir_node *new_pred = be_transform_node(pred);
4528 ir_node *block = be_transform_node(get_nodes_block(node));
4529 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4530 mode_Iu, get_Proj_proj(node));
4531 #ifdef DEBUG_libfirm
4532 new_proj->node_nr = node->node_nr;
4538 return be_duplicate_node(node);
4542 * Enters all transform functions into the generic pointer
4544 static void register_transformers(void)
4548 /* first clear the generic function pointer for all ops */
4549 clear_irp_opcodes_generic_func();
4551 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4552 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4590 /* transform ops from intrinsic lowering */
4602 GEN(ia32_l_LLtoFloat);
4603 GEN(ia32_l_FloattoLL);
4609 /* we should never see these nodes */
4624 /* handle generic backend nodes */
4633 op_Mulh = get_op_Mulh();
4642 * Pre-transform all unknown and noreg nodes.
4644 static void ia32_pretransform_node(void *arch_cg) {
4645 ia32_code_gen_t *cg = arch_cg;
4647 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4648 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4649 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4650 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4651 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4652 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4657 * Walker, checks if all ia32 nodes producing more than one result have their
4658 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4660 static void add_missing_keep_walker(ir_node *node, void *data)
4663 unsigned found_projs = 0;
4664 const ir_edge_t *edge;
4665 ir_mode *mode = get_irn_mode(node);
4670 if(!is_ia32_irn(node))
4673 n_outs = get_ia32_n_res(node);
4676 if(is_ia32_SwitchJmp(node))
4679 assert(n_outs < (int) sizeof(unsigned) * 8);
4680 foreach_out_edge(node, edge) {
4681 ir_node *proj = get_edge_src_irn(edge);
4684 /* The node could be kept */
4688 if (get_irn_mode(proj) == mode_M)
4691 pn = get_Proj_proj(proj);
4692 assert(pn < n_outs);
4693 found_projs |= 1 << pn;
4697 /* are keeps missing? */
4699 for(i = 0; i < n_outs; ++i) {
4702 const arch_register_req_t *req;
4703 const arch_register_class_t *cls;
4705 if(found_projs & (1 << i)) {
4709 req = get_ia32_out_req(node, i);
4714 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4718 block = get_nodes_block(node);
4719 in[0] = new_r_Proj(current_ir_graph, block, node,
4720 arch_register_class_mode(cls), i);
4721 if(last_keep != NULL) {
4722 be_Keep_add_node(last_keep, cls, in[0]);
4724 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4725 if(sched_is_scheduled(node)) {
4726 sched_add_after(node, last_keep);
4733 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4736 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4738 ir_graph *irg = be_get_birg_irg(cg->birg);
4739 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4742 /* do the transformation */
4743 void ia32_transform_graph(ia32_code_gen_t *cg)
4747 register_transformers();
4749 initial_fpcw = NULL;
4751 BE_TIMER_PUSH(t_heights);
4752 heights = heights_new(cg->irg);
4753 BE_TIMER_POP(t_heights);
4754 ia32_calculate_non_address_mode_nodes(cg->birg);
4756 /* the transform phase is not safe for CSE (yet) because several nodes get
4757 * attributes set after their creation */
4758 cse_last = get_opt_cse();
4761 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4763 set_opt_cse(cse_last);
4765 ia32_free_non_address_mode_nodes();
4766 heights_free(heights);
4770 void ia32_init_transform(void)
4772 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");