2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval forming
206 * @param cnst the node representing the constant
208 static ir_entity *create_float_const_entity(ir_node *cnst)
210 ia32_isa_t *isa = env_cg->isa;
211 tarval *key = get_Const_tarval(cnst);
212 pmap_entry *e = pmap_find(isa->tv_ent, key);
218 ir_mode *mode = get_tarval_mode(tv);
221 if (! ia32_cg_config.use_sse2) {
222 /* try to reduce the mode to produce smaller sized entities */
223 if (mode != mode_F) {
224 if (tarval_ieee754_can_conv_lossless(tv, mode_F)) {
226 tv = tarval_convert_to(tv, mode);
227 } else if (mode != mode_D) {
228 if (tarval_ieee754_can_conv_lossless(tv, mode_D)) {
230 tv = tarval_convert_to(tv, mode);
236 if (mode == get_irn_mode(cnst)) {
237 /* mode was not changed */
238 tp = get_Const_type(cnst);
239 if (tp == firm_unknown_type)
240 tp = get_prim_type(isa->types, mode);
242 tp = get_prim_type(isa->types, mode);
244 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
246 set_entity_ld_ident(res, get_entity_ident(res));
247 set_entity_visibility(res, visibility_local);
248 set_entity_variability(res, variability_constant);
249 set_entity_allocation(res, allocation_static);
251 /* we create a new entity here: It's initialization must resist on the
253 rem = current_ir_graph;
254 current_ir_graph = get_const_code_irg();
255 set_atomic_ent_value(res, new_Const_type(tv, tp));
256 current_ir_graph = rem;
258 pmap_insert(isa->tv_ent, key, res);
266 static int is_Const_0(ir_node *node) {
267 return is_Const(node) && is_Const_null(node);
270 static int is_Const_1(ir_node *node) {
271 return is_Const(node) && is_Const_one(node);
274 static int is_Const_Minus_1(ir_node *node) {
275 return is_Const(node) && is_Const_all_one(node);
279 * returns true if constant can be created with a simple float command
281 static int is_simple_x87_Const(ir_node *node)
283 tarval *tv = get_Const_tarval(node);
284 if (tarval_is_null(tv) || tarval_is_one(tv))
287 /* TODO: match all the other float constants */
292 * returns true if constant can be created with a simple float command
294 static int is_simple_sse_Const(ir_node *node)
296 tarval *tv = get_Const_tarval(node);
297 ir_mode *mode = get_tarval_mode(tv);
302 if (tarval_is_null(tv) || tarval_is_one(tv))
305 if (mode == mode_D) {
306 unsigned val = get_tarval_sub_bits(tv, 0) |
307 (get_tarval_sub_bits(tv, 1) << 8) |
308 (get_tarval_sub_bits(tv, 2) << 16) |
309 (get_tarval_sub_bits(tv, 3) << 24);
311 /* lower 32bit are zero, really a 32bit constant */
315 /* TODO: match all the other float constants */
320 * Transforms a Const.
322 static ir_node *gen_Const(ir_node *node) {
323 ir_graph *irg = current_ir_graph;
324 ir_node *old_block = get_nodes_block(node);
325 ir_node *block = be_transform_node(old_block);
326 dbg_info *dbgi = get_irn_dbg_info(node);
327 ir_mode *mode = get_irn_mode(node);
329 assert(is_Const(node));
331 if (mode_is_float(mode)) {
333 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
334 ir_node *nomem = new_NoMem();
338 if (ia32_cg_config.use_sse2) {
339 tarval *tv = get_Const_tarval(node);
340 if (tarval_is_null(tv)) {
341 load = new_rd_ia32_xZero(dbgi, irg, block);
342 set_ia32_ls_mode(load, mode);
344 } else if (tarval_is_one(tv)) {
345 int cnst = mode == mode_F ? 26 : 55;
346 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
347 ir_node *imm2 = create_Immediate(NULL, 0, 2);
348 ir_node *pslld, *psrld;
350 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
351 set_ia32_ls_mode(load, mode);
352 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
353 set_ia32_ls_mode(pslld, mode);
354 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
355 set_ia32_ls_mode(psrld, mode);
357 } else if (mode == mode_F) {
358 /* we can place any 32bit constant by using a movd gp, sse */
359 unsigned val = get_tarval_sub_bits(tv, 0) |
360 (get_tarval_sub_bits(tv, 1) << 8) |
361 (get_tarval_sub_bits(tv, 2) << 16) |
362 (get_tarval_sub_bits(tv, 3) << 24);
363 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
364 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
365 set_ia32_ls_mode(load, mode);
368 if (mode == mode_D) {
369 unsigned val = get_tarval_sub_bits(tv, 0) |
370 (get_tarval_sub_bits(tv, 1) << 8) |
371 (get_tarval_sub_bits(tv, 2) << 16) |
372 (get_tarval_sub_bits(tv, 3) << 24);
374 ir_node *imm32 = create_Immediate(NULL, 0, 32);
375 ir_node *cnst, *psllq;
377 /* fine, lower 32bit are zero, produce 32bit value */
378 val = get_tarval_sub_bits(tv, 4) |
379 (get_tarval_sub_bits(tv, 5) << 8) |
380 (get_tarval_sub_bits(tv, 6) << 16) |
381 (get_tarval_sub_bits(tv, 7) << 24);
382 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
383 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
384 set_ia32_ls_mode(load, mode);
385 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
386 set_ia32_ls_mode(psllq, mode);
391 floatent = create_float_const_entity(node);
393 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
395 set_ia32_op_type(load, ia32_AddrModeS);
396 set_ia32_am_sc(load, floatent);
397 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
398 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
401 if (is_Const_null(node)) {
402 load = new_rd_ia32_vfldz(dbgi, irg, block);
404 set_ia32_ls_mode(load, mode);
405 } else if (is_Const_one(node)) {
406 load = new_rd_ia32_vfld1(dbgi, irg, block);
408 set_ia32_ls_mode(load, mode);
410 floatent = create_float_const_entity(node);
412 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
413 set_ia32_op_type(load, ia32_AddrModeS);
414 set_ia32_am_sc(load, floatent);
415 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
416 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
417 /* take the mode from the entity */
418 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
422 /* Const Nodes before the initial IncSP are a bad idea, because
423 * they could be spilled and we have no SP ready at that point yet.
424 * So add a dependency to the initial frame pointer calculation to
425 * avoid that situation.
427 if (get_irg_start_block(irg) == block) {
428 add_irn_dep(load, get_irg_frame(irg));
431 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
433 } else { /* non-float mode */
435 tarval *tv = get_Const_tarval(node);
438 tv = tarval_convert_to(tv, mode_Iu);
440 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
442 panic("couldn't convert constant tarval (%+F)", node);
444 val = get_tarval_long(tv);
446 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
447 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
450 if (get_irg_start_block(irg) == block) {
451 add_irn_dep(cnst, get_irg_frame(irg));
459 * Transforms a SymConst.
461 static ir_node *gen_SymConst(ir_node *node) {
462 ir_graph *irg = current_ir_graph;
463 ir_node *old_block = get_nodes_block(node);
464 ir_node *block = be_transform_node(old_block);
465 dbg_info *dbgi = get_irn_dbg_info(node);
466 ir_mode *mode = get_irn_mode(node);
469 if (mode_is_float(mode)) {
470 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
471 ir_node *nomem = new_NoMem();
473 if (ia32_cg_config.use_sse2)
474 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
476 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
477 set_ia32_am_sc(cnst, get_SymConst_entity(node));
478 set_ia32_use_frame(cnst);
482 if(get_SymConst_kind(node) != symconst_addr_ent) {
483 panic("backend only support symconst_addr_ent (at %+F)", node);
485 entity = get_SymConst_entity(node);
486 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
489 /* Const Nodes before the initial IncSP are a bad idea, because
490 * they could be spilled and we have no SP ready at that point yet
492 if (get_irg_start_block(irg) == block) {
493 add_irn_dep(cnst, get_irg_frame(irg));
496 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
501 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
502 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
503 static const struct {
505 const char *ent_name;
506 const char *cnst_str;
509 } names [ia32_known_const_max] = {
510 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
511 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
512 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
513 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
514 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
516 static ir_entity *ent_cache[ia32_known_const_max];
518 const char *tp_name, *ent_name, *cnst_str;
526 ent_name = names[kct].ent_name;
527 if (! ent_cache[kct]) {
528 tp_name = names[kct].tp_name;
529 cnst_str = names[kct].cnst_str;
531 switch (names[kct].mode) {
532 case 0: mode = mode_Iu; break;
533 case 1: mode = mode_Lu; break;
534 default: mode = mode_F; break;
536 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
537 tp = new_type_primitive(new_id_from_str(tp_name), mode);
538 /* set the specified alignment */
539 set_type_alignment_bytes(tp, names[kct].align);
541 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
543 set_entity_ld_ident(ent, get_entity_ident(ent));
544 set_entity_visibility(ent, visibility_local);
545 set_entity_variability(ent, variability_constant);
546 set_entity_allocation(ent, allocation_static);
548 /* we create a new entity here: It's initialization must resist on the
550 rem = current_ir_graph;
551 current_ir_graph = get_const_code_irg();
552 cnst = new_Const(mode, tv);
553 current_ir_graph = rem;
555 set_atomic_ent_value(ent, cnst);
557 /* cache the entry */
558 ent_cache[kct] = ent;
561 return ent_cache[kct];
566 * Prints the old node name on cg obst and returns a pointer to it.
568 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
569 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
571 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
572 obstack_1grow(isa->name_obst, 0);
573 return obstack_finish(isa->name_obst);
578 * return true if the node is a Proj(Load) and could be used in source address
579 * mode for another node. Will return only true if the @p other node is not
580 * dependent on the memory of the Load (for binary operations use the other
581 * input here, for unary operations use NULL).
583 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
584 ir_node *other, ir_node *other2)
589 /* float constants are always available */
590 if (is_Const(node)) {
591 ir_mode *mode = get_irn_mode(node);
592 if (mode_is_float(mode)) {
593 if (ia32_cg_config.use_sse2) {
594 if (is_simple_sse_Const(node))
597 if (is_simple_x87_Const(node))
600 if (get_irn_n_edges(node) > 1)
608 load = get_Proj_pred(node);
609 pn = get_Proj_proj(node);
610 if (!is_Load(load) || pn != pn_Load_res)
612 if (get_nodes_block(load) != block)
614 /* we only use address mode if we're the only user of the load */
615 if (get_irn_n_edges(node) > 1)
617 /* in some edge cases with address mode we might reach the load normally
618 * and through some AM sequence, if it is already materialized then we
619 * can't create an AM node from it */
620 if (be_is_transformed(node))
623 /* don't do AM if other node inputs depend on the load (via mem-proj) */
624 if (other != NULL && get_nodes_block(other) == block &&
625 heights_reachable_in_block(heights, other, load))
627 if (other2 != NULL && get_nodes_block(other2) == block &&
628 heights_reachable_in_block(heights, other2, load))
634 typedef struct ia32_address_mode_t ia32_address_mode_t;
635 struct ia32_address_mode_t {
639 ia32_op_type_t op_type;
643 unsigned commutative : 1;
644 unsigned ins_permuted : 1;
647 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
651 /* construct load address */
652 memset(addr, 0, sizeof(addr[0]));
653 ia32_create_address_mode(addr, ptr, /*force=*/0);
655 noreg_gp = ia32_new_NoReg_gp(env_cg);
656 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
657 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
658 addr->mem = be_transform_node(mem);
661 static void build_address(ia32_address_mode_t *am, ir_node *node)
663 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
664 ia32_address_t *addr = &am->addr;
670 if (is_Const(node)) {
671 ir_entity *entity = create_float_const_entity(node);
672 addr->base = noreg_gp;
673 addr->index = noreg_gp;
674 addr->mem = new_NoMem();
675 addr->symconst_ent = entity;
677 am->ls_mode = get_type_mode(get_entity_type(entity));
678 am->pinned = op_pin_state_floats;
682 load = get_Proj_pred(node);
683 ptr = get_Load_ptr(load);
684 mem = get_Load_mem(load);
685 new_mem = be_transform_node(mem);
686 am->pinned = get_irn_pinned(load);
687 am->ls_mode = get_Load_mode(load);
688 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
690 /* construct load address */
691 ia32_create_address_mode(addr, ptr, /*force=*/0);
693 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
694 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
698 static void set_address(ir_node *node, const ia32_address_t *addr)
700 set_ia32_am_scale(node, addr->scale);
701 set_ia32_am_sc(node, addr->symconst_ent);
702 set_ia32_am_offs_int(node, addr->offset);
703 if(addr->symconst_sign)
704 set_ia32_am_sc_sign(node);
706 set_ia32_use_frame(node);
707 set_ia32_frame_ent(node, addr->frame_entity);
711 * Apply attributes of a given address mode to a node.
713 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
715 set_address(node, &am->addr);
717 set_ia32_op_type(node, am->op_type);
718 set_ia32_ls_mode(node, am->ls_mode);
719 if (am->pinned == op_pin_state_pinned) {
720 set_irn_pinned(node, am->pinned);
723 set_ia32_commutative(node);
727 * Check, if a given node is a Down-Conv, ie. a integer Conv
728 * from a mode with a mode with more bits to a mode with lesser bits.
729 * Moreover, we return only true if the node has not more than 1 user.
731 * @param node the node
732 * @return non-zero if node is a Down-Conv
734 static int is_downconv(const ir_node *node)
742 /* we only want to skip the conv when we're the only user
743 * (not optimal but for now...)
745 if(get_irn_n_edges(node) > 1)
748 src_mode = get_irn_mode(get_Conv_op(node));
749 dest_mode = get_irn_mode(node);
750 return mode_needs_gp_reg(src_mode)
751 && mode_needs_gp_reg(dest_mode)
752 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
755 /* Skip all Down-Conv's on a given node and return the resulting node. */
756 ir_node *ia32_skip_downconv(ir_node *node) {
757 while (is_downconv(node))
758 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if(mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
784 * matches operands of a node into ia32 addressing/operand modes. This covers
785 * usage of source address mode, immediates, operations with non 32-bit modes,
787 * The resulting data is filled into the @p am struct. block is the block
788 * of the node whose arguments are matched. op1, op2 are the first and second
789 * input that are matched (op1 may be NULL). other_op is another unrelated
790 * input that is not matched! but which is needed sometimes to check if AM
791 * for op1/op2 is legal.
792 * @p flags describes the supported modes of the operation in detail.
794 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
795 ir_node *op1, ir_node *op2, ir_node *other_op,
798 ia32_address_t *addr = &am->addr;
799 ir_mode *mode = get_irn_mode(op2);
800 int mode_bits = get_mode_size_bits(mode);
801 ir_node *noreg_gp, *new_op1, *new_op2;
803 unsigned commutative;
804 int use_am_and_immediates;
807 memset(am, 0, sizeof(am[0]));
809 commutative = (flags & match_commutative) != 0;
810 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
811 use_am = (flags & match_am) != 0;
812 use_immediate = (flags & match_immediate) != 0;
813 assert(!use_am_and_immediates || use_immediate);
816 assert(!commutative || op1 != NULL);
817 assert(use_am || !(flags & match_8bit_am));
818 assert(use_am || !(flags & match_16bit_am));
820 if (mode_bits == 8) {
821 if (!(flags & match_8bit_am))
823 /* we don't automatically add upconvs yet */
824 assert((flags & match_mode_neutral) || (flags & match_8bit));
825 } else if (mode_bits == 16) {
826 if (!(flags & match_16bit_am))
828 /* we don't automatically add upconvs yet */
829 assert((flags & match_mode_neutral) || (flags & match_16bit));
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
841 /* match immediates. firm nodes are normalized: constants are always on the
844 if (!(flags & match_try_am) && use_immediate) {
845 new_op2 = try_create_Immediate(op2, 0);
848 noreg_gp = ia32_new_NoReg_gp(env_cg);
849 if (new_op2 == NULL &&
850 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
851 build_address(am, op2);
852 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
853 if (mode_is_float(mode)) {
854 new_op2 = ia32_new_NoReg_vfp(env_cg);
858 am->op_type = ia32_AddrModeS;
859 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
861 ia32_use_source_address_mode(block, op1, op2, other_op)) {
863 build_address(am, op1);
865 if (mode_is_float(mode)) {
866 noreg = ia32_new_NoReg_vfp(env_cg);
871 if (new_op2 != NULL) {
874 new_op1 = be_transform_node(op2);
876 am->ins_permuted = 1;
878 am->op_type = ia32_AddrModeS;
880 if (flags & match_try_am) {
883 am->op_type = ia32_Normal;
887 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
889 new_op2 = be_transform_node(op2);
890 am->op_type = ia32_Normal;
891 am->ls_mode = get_irn_mode(op2);
892 if (flags & match_mode_neutral)
893 am->ls_mode = mode_Iu;
895 if (addr->base == NULL)
896 addr->base = noreg_gp;
897 if (addr->index == NULL)
898 addr->index = noreg_gp;
899 if (addr->mem == NULL)
900 addr->mem = new_NoMem();
902 am->new_op1 = new_op1;
903 am->new_op2 = new_op2;
904 am->commutative = commutative;
907 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
912 if (am->mem_proj == NULL)
915 /* we have to create a mode_T so the old MemProj can attach to us */
916 mode = get_irn_mode(node);
917 load = get_Proj_pred(am->mem_proj);
919 mark_irn_visited(load);
920 be_set_transformed_node(load, node);
922 if (mode != mode_T) {
923 set_irn_mode(node, mode_T);
924 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
931 * Construct a standard binary operation, set AM and immediate if required.
933 * @param op1 The first operand
934 * @param op2 The second operand
935 * @param func The node constructor function
936 * @return The constructed ia32 node.
938 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
939 construct_binop_func *func, match_flags_t flags)
942 ir_node *block, *new_block, *new_node;
943 ia32_address_mode_t am;
944 ia32_address_t *addr = &am.addr;
946 block = get_nodes_block(node);
947 match_arguments(&am, block, op1, op2, NULL, flags);
949 dbgi = get_irn_dbg_info(node);
950 new_block = be_transform_node(block);
951 new_node = func(dbgi, current_ir_graph, new_block,
952 addr->base, addr->index, addr->mem,
953 am.new_op1, am.new_op2);
954 set_am_attributes(new_node, &am);
955 /* we can't use source address mode anymore when using immediates */
956 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
957 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
958 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
960 new_node = fix_mem_proj(new_node, &am);
967 n_ia32_l_binop_right,
968 n_ia32_l_binop_eflags
970 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
971 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
972 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
973 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
974 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
975 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
978 * Construct a binary operation which also consumes the eflags.
980 * @param node The node to transform
981 * @param func The node constructor function
982 * @param flags The match flags
983 * @return The constructor ia32 node
985 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
988 ir_node *src_block = get_nodes_block(node);
989 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
990 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
992 ir_node *block, *new_node, *eflags, *new_eflags;
993 ia32_address_mode_t am;
994 ia32_address_t *addr = &am.addr;
996 match_arguments(&am, src_block, op1, op2, NULL, flags);
998 dbgi = get_irn_dbg_info(node);
999 block = be_transform_node(src_block);
1000 eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1001 new_eflags = be_transform_node(eflags);
1002 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
1003 addr->mem, am.new_op1, am.new_op2, new_eflags);
1004 set_am_attributes(new_node, &am);
1005 /* we can't use source address mode anymore when using immediates */
1006 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1007 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1008 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1010 new_node = fix_mem_proj(new_node, &am);
1015 static ir_node *get_fpcw(void)
1018 if (initial_fpcw != NULL)
1019 return initial_fpcw;
1021 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1022 &ia32_fp_cw_regs[REG_FPCW]);
1023 initial_fpcw = be_transform_node(fpcw);
1025 return initial_fpcw;
1029 * Construct a standard binary operation, set AM and immediate if required.
1031 * @param op1 The first operand
1032 * @param op2 The second operand
1033 * @param func The node constructor function
1034 * @return The constructed ia32 node.
1036 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1037 construct_binop_float_func *func,
1038 match_flags_t flags)
1040 ir_mode *mode = get_irn_mode(node);
1042 ir_node *block, *new_block, *new_node;
1043 ia32_address_mode_t am;
1044 ia32_address_t *addr = &am.addr;
1046 /* cannot use address mode with long double on x87 */
1047 if (get_mode_size_bits(mode) > 64)
1050 block = get_nodes_block(node);
1051 match_arguments(&am, block, op1, op2, NULL, flags);
1053 dbgi = get_irn_dbg_info(node);
1054 new_block = be_transform_node(block);
1055 new_node = func(dbgi, current_ir_graph, new_block,
1056 addr->base, addr->index, addr->mem,
1057 am.new_op1, am.new_op2, get_fpcw());
1058 set_am_attributes(new_node, &am);
1060 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1062 new_node = fix_mem_proj(new_node, &am);
1068 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1070 * @param op1 The first operand
1071 * @param op2 The second operand
1072 * @param func The node constructor function
1073 * @return The constructed ia32 node.
1075 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1076 construct_shift_func *func,
1077 match_flags_t flags)
1080 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1082 assert(! mode_is_float(get_irn_mode(node)));
1083 assert(flags & match_immediate);
1084 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1086 if (flags & match_mode_neutral) {
1087 op1 = ia32_skip_downconv(op1);
1088 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1089 panic("right shifting of non-32bit values not supported, yet");
1091 new_op1 = be_transform_node(op1);
1093 /* the shift amount can be any mode that is bigger than 5 bits, since all
1094 * other bits are ignored anyway */
1095 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1096 op2 = get_Conv_op(op2);
1097 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1099 new_op2 = create_immediate_or_transform(op2, 0);
1101 dbgi = get_irn_dbg_info(node);
1102 block = get_nodes_block(node);
1103 new_block = be_transform_node(block);
1104 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
1105 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1107 /* lowered shift instruction may have a dependency operand, handle it here */
1108 if (get_irn_arity(node) == 3) {
1109 /* we have a dependency */
1110 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1111 add_irn_dep(new_node, new_dep);
1119 * Construct a standard unary operation, set AM and immediate if required.
1121 * @param op The operand
1122 * @param func The node constructor function
1123 * @return The constructed ia32 node.
1125 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1126 match_flags_t flags)
1129 ir_node *block, *new_block, *new_op, *new_node;
1131 assert(flags == 0 || flags == match_mode_neutral);
1132 if (flags & match_mode_neutral) {
1133 op = ia32_skip_downconv(op);
1136 new_op = be_transform_node(op);
1137 dbgi = get_irn_dbg_info(node);
1138 block = get_nodes_block(node);
1139 new_block = be_transform_node(block);
1140 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1142 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1147 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1148 ia32_address_t *addr)
1150 ir_node *base, *index, *res;
1154 base = ia32_new_NoReg_gp(env_cg);
1156 base = be_transform_node(base);
1159 index = addr->index;
1160 if (index == NULL) {
1161 index = ia32_new_NoReg_gp(env_cg);
1163 index = be_transform_node(index);
1166 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1167 set_address(res, addr);
1173 * Returns non-zero if a given address mode has a symbolic or
1174 * numerical offset != 0.
1176 static int am_has_immediates(const ia32_address_t *addr)
1178 return addr->offset != 0 || addr->symconst_ent != NULL
1179 || addr->frame_entity || addr->use_frame;
1183 * Creates an ia32 Add.
1185 * @return the created ia32 Add node
1187 static ir_node *gen_Add(ir_node *node) {
1188 ir_mode *mode = get_irn_mode(node);
1189 ir_node *op1 = get_Add_left(node);
1190 ir_node *op2 = get_Add_right(node);
1192 ir_node *block, *new_block, *new_node, *add_immediate_op;
1193 ia32_address_t addr;
1194 ia32_address_mode_t am;
1196 if (mode_is_float(mode)) {
1197 if (ia32_cg_config.use_sse2)
1198 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1199 match_commutative | match_am);
1201 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1202 match_commutative | match_am);
1205 ia32_mark_non_am(node);
1207 op2 = ia32_skip_downconv(op2);
1208 op1 = ia32_skip_downconv(op1);
1212 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1213 * 1. Add with immediate -> Lea
1214 * 2. Add with possible source address mode -> Add
1215 * 3. Otherwise -> Lea
1217 memset(&addr, 0, sizeof(addr));
1218 ia32_create_address_mode(&addr, node, /*force=*/1);
1219 add_immediate_op = NULL;
1221 dbgi = get_irn_dbg_info(node);
1222 block = get_nodes_block(node);
1223 new_block = be_transform_node(block);
1226 if(addr.base == NULL && addr.index == NULL) {
1227 ir_graph *irg = current_ir_graph;
1228 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1229 addr.symconst_sign, addr.offset);
1230 add_irn_dep(new_node, get_irg_frame(irg));
1231 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1234 /* add with immediate? */
1235 if(addr.index == NULL) {
1236 add_immediate_op = addr.base;
1237 } else if(addr.base == NULL && addr.scale == 0) {
1238 add_immediate_op = addr.index;
1241 if(add_immediate_op != NULL) {
1242 if(!am_has_immediates(&addr)) {
1243 #ifdef DEBUG_libfirm
1244 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1247 return be_transform_node(add_immediate_op);
1250 new_node = create_lea_from_address(dbgi, new_block, &addr);
1251 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1255 /* test if we can use source address mode */
1256 match_arguments(&am, block, op1, op2, NULL, match_commutative
1257 | match_mode_neutral | match_am | match_immediate | match_try_am);
1259 /* construct an Add with source address mode */
1260 if (am.op_type == ia32_AddrModeS) {
1261 ir_graph *irg = current_ir_graph;
1262 ia32_address_t *am_addr = &am.addr;
1263 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1264 am_addr->index, am_addr->mem, am.new_op1,
1266 set_am_attributes(new_node, &am);
1267 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1269 new_node = fix_mem_proj(new_node, &am);
1274 /* otherwise construct a lea */
1275 new_node = create_lea_from_address(dbgi, new_block, &addr);
1276 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1281 * Creates an ia32 Mul.
1283 * @return the created ia32 Mul node
1285 static ir_node *gen_Mul(ir_node *node) {
1286 ir_node *op1 = get_Mul_left(node);
1287 ir_node *op2 = get_Mul_right(node);
1288 ir_mode *mode = get_irn_mode(node);
1290 if (mode_is_float(mode)) {
1291 if (ia32_cg_config.use_sse2)
1292 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1293 match_commutative | match_am);
1295 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1296 match_commutative | match_am);
1298 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1299 match_commutative | match_am | match_mode_neutral |
1300 match_immediate | match_am_and_immediates);
1304 * Creates an ia32 Mulh.
1305 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1306 * this result while Mul returns the lower 32 bit.
1308 * @return the created ia32 Mulh node
1310 static ir_node *gen_Mulh(ir_node *node)
1312 ir_node *block = get_nodes_block(node);
1313 ir_node *new_block = be_transform_node(block);
1314 ir_graph *irg = current_ir_graph;
1315 dbg_info *dbgi = get_irn_dbg_info(node);
1316 ir_mode *mode = get_irn_mode(node);
1317 ir_node *op1 = get_Mulh_left(node);
1318 ir_node *op2 = get_Mulh_right(node);
1319 ir_node *proj_res_high;
1321 ia32_address_mode_t am;
1322 ia32_address_t *addr = &am.addr;
1324 assert(!mode_is_float(mode) && "Mulh with float not supported");
1325 assert(get_mode_size_bits(mode) == 32);
1327 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1329 if (mode_is_signed(mode)) {
1330 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1331 addr->index, addr->mem, am.new_op1,
1334 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1335 addr->index, addr->mem, am.new_op1,
1339 set_am_attributes(new_node, &am);
1340 /* we can't use source address mode anymore when using immediates */
1341 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1342 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1343 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1345 assert(get_irn_mode(new_node) == mode_T);
1347 fix_mem_proj(new_node, &am);
1349 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1350 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1351 mode_Iu, pn_ia32_IMul1OP_res_high);
1353 return proj_res_high;
1359 * Creates an ia32 And.
1361 * @return The created ia32 And node
1363 static ir_node *gen_And(ir_node *node) {
1364 ir_node *op1 = get_And_left(node);
1365 ir_node *op2 = get_And_right(node);
1366 assert(! mode_is_float(get_irn_mode(node)));
1368 /* is it a zero extension? */
1369 if (is_Const(op2)) {
1370 tarval *tv = get_Const_tarval(op2);
1371 long v = get_tarval_long(tv);
1373 if (v == 0xFF || v == 0xFFFF) {
1374 dbg_info *dbgi = get_irn_dbg_info(node);
1375 ir_node *block = get_nodes_block(node);
1382 assert(v == 0xFFFF);
1385 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1391 return gen_binop(node, op1, op2, new_rd_ia32_And,
1392 match_commutative | match_mode_neutral | match_am
1399 * Creates an ia32 Or.
1401 * @return The created ia32 Or node
1403 static ir_node *gen_Or(ir_node *node) {
1404 ir_node *op1 = get_Or_left(node);
1405 ir_node *op2 = get_Or_right(node);
1407 assert (! mode_is_float(get_irn_mode(node)));
1408 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1409 | match_mode_neutral | match_am | match_immediate);
1415 * Creates an ia32 Eor.
1417 * @return The created ia32 Eor node
1419 static ir_node *gen_Eor(ir_node *node) {
1420 ir_node *op1 = get_Eor_left(node);
1421 ir_node *op2 = get_Eor_right(node);
1423 assert(! mode_is_float(get_irn_mode(node)));
1424 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1425 | match_mode_neutral | match_am | match_immediate);
1430 * Creates an ia32 Sub.
1432 * @return The created ia32 Sub node
1434 static ir_node *gen_Sub(ir_node *node) {
1435 ir_node *op1 = get_Sub_left(node);
1436 ir_node *op2 = get_Sub_right(node);
1437 ir_mode *mode = get_irn_mode(node);
1439 if (mode_is_float(mode)) {
1440 if (ia32_cg_config.use_sse2)
1441 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1443 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1447 if (is_Const(op2)) {
1448 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1452 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1453 | match_am | match_immediate);
1457 * Generates an ia32 DivMod with additional infrastructure for the
1458 * register allocator if needed.
1460 static ir_node *create_Div(ir_node *node)
1462 ir_graph *irg = current_ir_graph;
1463 dbg_info *dbgi = get_irn_dbg_info(node);
1464 ir_node *block = get_nodes_block(node);
1465 ir_node *new_block = be_transform_node(block);
1472 ir_node *sign_extension;
1473 ia32_address_mode_t am;
1474 ia32_address_t *addr = &am.addr;
1476 /* the upper bits have random contents for smaller modes */
1477 switch (get_irn_opcode(node)) {
1479 op1 = get_Div_left(node);
1480 op2 = get_Div_right(node);
1481 mem = get_Div_mem(node);
1482 mode = get_Div_resmode(node);
1485 op1 = get_Mod_left(node);
1486 op2 = get_Mod_right(node);
1487 mem = get_Mod_mem(node);
1488 mode = get_Mod_resmode(node);
1491 op1 = get_DivMod_left(node);
1492 op2 = get_DivMod_right(node);
1493 mem = get_DivMod_mem(node);
1494 mode = get_DivMod_resmode(node);
1497 panic("invalid divmod node %+F", node);
1500 match_arguments(&am, block, op1, op2, NULL, match_am);
1502 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1503 is the memory of the consumed address. We can have only the second op as address
1504 in Div nodes, so check only op2. */
1505 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1506 new_mem = be_transform_node(mem);
1507 if(!is_NoMem(addr->mem)) {
1511 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1514 new_mem = addr->mem;
1517 if (mode_is_signed(mode)) {
1518 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1519 add_irn_dep(produceval, get_irg_frame(irg));
1520 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1523 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1524 add_irn_dep(sign_extension, get_irg_frame(irg));
1527 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1528 addr->index, new_mem, am.new_op2,
1529 am.new_op1, sign_extension);
1530 set_irn_pinned(new_node, get_irn_pinned(node));
1532 set_am_attributes(new_node, &am);
1533 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1535 new_node = fix_mem_proj(new_node, &am);
1541 static ir_node *gen_Mod(ir_node *node) {
1542 return create_Div(node);
1545 static ir_node *gen_Div(ir_node *node) {
1546 return create_Div(node);
1549 static ir_node *gen_DivMod(ir_node *node) {
1550 return create_Div(node);
1556 * Creates an ia32 floating Div.
1558 * @return The created ia32 xDiv node
1560 static ir_node *gen_Quot(ir_node *node)
1562 ir_node *op1 = get_Quot_left(node);
1563 ir_node *op2 = get_Quot_right(node);
1565 if (ia32_cg_config.use_sse2) {
1566 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1568 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1574 * Creates an ia32 Shl.
1576 * @return The created ia32 Shl node
1578 static ir_node *gen_Shl(ir_node *node) {
1579 ir_node *left = get_Shl_left(node);
1580 ir_node *right = get_Shl_right(node);
1582 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1583 match_mode_neutral | match_immediate);
1587 * Creates an ia32 Shr.
1589 * @return The created ia32 Shr node
1591 static ir_node *gen_Shr(ir_node *node) {
1592 ir_node *left = get_Shr_left(node);
1593 ir_node *right = get_Shr_right(node);
1595 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1601 * Creates an ia32 Sar.
1603 * @return The created ia32 Shrs node
1605 static ir_node *gen_Shrs(ir_node *node) {
1606 ir_node *left = get_Shrs_left(node);
1607 ir_node *right = get_Shrs_right(node);
1608 ir_mode *mode = get_irn_mode(node);
1610 if(is_Const(right) && mode == mode_Is) {
1611 tarval *tv = get_Const_tarval(right);
1612 long val = get_tarval_long(tv);
1614 /* this is a sign extension */
1615 ir_graph *irg = current_ir_graph;
1616 dbg_info *dbgi = get_irn_dbg_info(node);
1617 ir_node *block = be_transform_node(get_nodes_block(node));
1619 ir_node *new_op = be_transform_node(op);
1620 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1621 add_irn_dep(pval, get_irg_frame(irg));
1623 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1627 /* 8 or 16 bit sign extension? */
1628 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1629 ir_node *shl_left = get_Shl_left(left);
1630 ir_node *shl_right = get_Shl_right(left);
1631 if(is_Const(shl_right)) {
1632 tarval *tv1 = get_Const_tarval(right);
1633 tarval *tv2 = get_Const_tarval(shl_right);
1634 if(tv1 == tv2 && tarval_is_long(tv1)) {
1635 long val = get_tarval_long(tv1);
1636 if(val == 16 || val == 24) {
1637 dbg_info *dbgi = get_irn_dbg_info(node);
1638 ir_node *block = get_nodes_block(node);
1648 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1657 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1663 * Creates an ia32 RotL.
1665 * @param op1 The first operator
1666 * @param op2 The second operator
1667 * @return The created ia32 RotL node
1669 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1670 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1676 * Creates an ia32 RotR.
1677 * NOTE: There is no RotR with immediate because this would always be a RotL
1678 * "imm-mode_size_bits" which can be pre-calculated.
1680 * @param op1 The first operator
1681 * @param op2 The second operator
1682 * @return The created ia32 RotR node
1684 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1685 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1691 * Creates an ia32 RotR or RotL (depending on the found pattern).
1693 * @return The created ia32 RotL or RotR node
1695 static ir_node *gen_Rot(ir_node *node) {
1696 ir_node *rotate = NULL;
1697 ir_node *op1 = get_Rot_left(node);
1698 ir_node *op2 = get_Rot_right(node);
1700 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1701 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1702 that means we can create a RotR instead of an Add and a RotL */
1704 if (get_irn_op(op2) == op_Add) {
1706 ir_node *left = get_Add_left(add);
1707 ir_node *right = get_Add_right(add);
1708 if (is_Const(right)) {
1709 tarval *tv = get_Const_tarval(right);
1710 ir_mode *mode = get_irn_mode(node);
1711 long bits = get_mode_size_bits(mode);
1713 if (get_irn_op(left) == op_Minus &&
1714 tarval_is_long(tv) &&
1715 get_tarval_long(tv) == bits &&
1718 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1719 rotate = gen_RotR(node, op1, get_Minus_op(left));
1724 if (rotate == NULL) {
1725 rotate = gen_RotL(node, op1, op2);
1734 * Transforms a Minus node.
1736 * @return The created ia32 Minus node
1738 static ir_node *gen_Minus(ir_node *node)
1740 ir_node *op = get_Minus_op(node);
1741 ir_node *block = be_transform_node(get_nodes_block(node));
1742 ir_graph *irg = current_ir_graph;
1743 dbg_info *dbgi = get_irn_dbg_info(node);
1744 ir_mode *mode = get_irn_mode(node);
1749 if (mode_is_float(mode)) {
1750 ir_node *new_op = be_transform_node(op);
1751 if (ia32_cg_config.use_sse2) {
1752 /* TODO: non-optimal... if we have many xXors, then we should
1753 * rather create a load for the const and use that instead of
1754 * several AM nodes... */
1755 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1756 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1757 ir_node *nomem = new_rd_NoMem(irg);
1759 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1760 nomem, new_op, noreg_xmm);
1762 size = get_mode_size_bits(mode);
1763 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1765 set_ia32_am_sc(new_node, ent);
1766 set_ia32_op_type(new_node, ia32_AddrModeS);
1767 set_ia32_ls_mode(new_node, mode);
1769 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1772 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1775 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1781 * Transforms a Not node.
1783 * @return The created ia32 Not node
1785 static ir_node *gen_Not(ir_node *node) {
1786 ir_node *op = get_Not_op(node);
1788 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1789 assert (! mode_is_float(get_irn_mode(node)));
1791 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1797 * Transforms an Abs node.
1799 * @return The created ia32 Abs node
1801 static ir_node *gen_Abs(ir_node *node)
1803 ir_node *block = get_nodes_block(node);
1804 ir_node *new_block = be_transform_node(block);
1805 ir_node *op = get_Abs_op(node);
1806 ir_graph *irg = current_ir_graph;
1807 dbg_info *dbgi = get_irn_dbg_info(node);
1808 ir_mode *mode = get_irn_mode(node);
1809 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1810 ir_node *nomem = new_NoMem();
1816 if (mode_is_float(mode)) {
1817 new_op = be_transform_node(op);
1819 if (ia32_cg_config.use_sse2) {
1820 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1821 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1822 nomem, new_op, noreg_fp);
1824 size = get_mode_size_bits(mode);
1825 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1827 set_ia32_am_sc(new_node, ent);
1829 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1831 set_ia32_op_type(new_node, ia32_AddrModeS);
1832 set_ia32_ls_mode(new_node, mode);
1834 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1835 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1838 ir_node *xor, *pval, *sign_extension;
1840 if (get_mode_size_bits(mode) == 32) {
1841 new_op = be_transform_node(op);
1843 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1846 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1847 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1850 add_irn_dep(pval, get_irg_frame(irg));
1851 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1853 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1854 nomem, new_op, sign_extension);
1855 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1857 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1858 nomem, xor, sign_extension);
1859 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1865 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1867 ir_graph *irg = current_ir_graph;
1875 /* we have a Cmp as input */
1877 ir_node *pred = get_Proj_pred(node);
1879 flags = be_transform_node(pred);
1880 *pnc_out = get_Proj_proj(node);
1885 /* a mode_b value, we have to compare it against 0 */
1886 dbgi = get_irn_dbg_info(node);
1887 new_block = be_transform_node(get_nodes_block(node));
1888 new_op = be_transform_node(node);
1889 noreg = ia32_new_NoReg_gp(env_cg);
1890 nomem = new_NoMem();
1891 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1892 new_op, new_op, 0, 0);
1893 *pnc_out = pn_Cmp_Lg;
1898 * Transforms a Load.
1900 * @return the created ia32 Load node
1902 static ir_node *gen_Load(ir_node *node) {
1903 ir_node *old_block = get_nodes_block(node);
1904 ir_node *block = be_transform_node(old_block);
1905 ir_node *ptr = get_Load_ptr(node);
1906 ir_node *mem = get_Load_mem(node);
1907 ir_node *new_mem = be_transform_node(mem);
1910 ir_graph *irg = current_ir_graph;
1911 dbg_info *dbgi = get_irn_dbg_info(node);
1912 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1913 ir_mode *mode = get_Load_mode(node);
1916 ia32_address_t addr;
1918 /* construct load address */
1919 memset(&addr, 0, sizeof(addr));
1920 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1927 base = be_transform_node(base);
1933 index = be_transform_node(index);
1936 if (mode_is_float(mode)) {
1937 if (ia32_cg_config.use_sse2) {
1938 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1940 res_mode = mode_xmm;
1942 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1944 res_mode = mode_vfp;
1947 assert(mode != mode_b);
1949 /* create a conv node with address mode for smaller modes */
1950 if(get_mode_size_bits(mode) < 32) {
1951 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1952 new_mem, noreg, mode);
1954 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1959 set_irn_pinned(new_node, get_irn_pinned(node));
1960 set_ia32_op_type(new_node, ia32_AddrModeS);
1961 set_ia32_ls_mode(new_node, mode);
1962 set_address(new_node, &addr);
1964 if(get_irn_pinned(node) == op_pin_state_floats) {
1965 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1968 /* make sure we are scheduled behind the initial IncSP/Barrier
1969 * to avoid spills being placed before it
1971 if (block == get_irg_start_block(irg)) {
1972 add_irn_dep(new_node, get_irg_frame(irg));
1975 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1980 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1981 ir_node *ptr, ir_node *other)
1988 /* we only use address mode if we're the only user of the load */
1989 if(get_irn_n_edges(node) > 1)
1992 load = get_Proj_pred(node);
1995 if(get_nodes_block(load) != block)
1998 /* Store should be attached to the load */
1999 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
2001 /* store should have the same pointer as the load */
2002 if(get_Load_ptr(load) != ptr)
2005 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2006 if(other != NULL && get_nodes_block(other) == block
2007 && heights_reachable_in_block(heights, other, load))
2013 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2014 ir_node *mem, ir_node *ptr, ir_mode *mode,
2015 construct_binop_dest_func *func,
2016 construct_binop_dest_func *func8bit,
2017 match_flags_t flags)
2019 ir_node *src_block = get_nodes_block(node);
2021 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2022 ir_graph *irg = current_ir_graph;
2027 ia32_address_mode_t am;
2028 ia32_address_t *addr = &am.addr;
2029 memset(&am, 0, sizeof(am));
2031 assert(flags & match_dest_am);
2032 assert(flags & match_immediate); /* there is no destam node without... */
2033 commutative = (flags & match_commutative) != 0;
2035 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2036 build_address(&am, op1);
2037 new_op = create_immediate_or_transform(op2, 0);
2038 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2039 build_address(&am, op2);
2040 new_op = create_immediate_or_transform(op1, 0);
2045 if(addr->base == NULL)
2046 addr->base = noreg_gp;
2047 if(addr->index == NULL)
2048 addr->index = noreg_gp;
2049 if(addr->mem == NULL)
2050 addr->mem = new_NoMem();
2052 dbgi = get_irn_dbg_info(node);
2053 block = be_transform_node(src_block);
2054 if(get_mode_size_bits(mode) == 8) {
2055 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2058 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2061 set_address(new_node, addr);
2062 set_ia32_op_type(new_node, ia32_AddrModeD);
2063 set_ia32_ls_mode(new_node, mode);
2064 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2069 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2070 ir_node *ptr, ir_mode *mode,
2071 construct_unop_dest_func *func)
2073 ir_graph *irg = current_ir_graph;
2074 ir_node *src_block = get_nodes_block(node);
2078 ia32_address_mode_t am;
2079 ia32_address_t *addr = &am.addr;
2080 memset(&am, 0, sizeof(am));
2082 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2085 build_address(&am, op);
2087 dbgi = get_irn_dbg_info(node);
2088 block = be_transform_node(src_block);
2089 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2090 set_address(new_node, addr);
2091 set_ia32_op_type(new_node, ia32_AddrModeD);
2092 set_ia32_ls_mode(new_node, mode);
2093 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2098 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2099 ir_mode *mode = get_irn_mode(node);
2100 ir_node *psi_true = get_Psi_val(node, 0);
2101 ir_node *psi_default = get_Psi_default(node);
2112 ia32_address_t addr;
2114 if(get_mode_size_bits(mode) != 8)
2117 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2119 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2125 build_address_ptr(&addr, ptr, mem);
2127 irg = current_ir_graph;
2128 dbgi = get_irn_dbg_info(node);
2129 block = get_nodes_block(node);
2130 new_block = be_transform_node(block);
2131 cond = get_Psi_cond(node, 0);
2132 flags = get_flags_node(cond, &pnc);
2133 new_mem = be_transform_node(mem);
2134 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2135 addr.index, addr.mem, flags, pnc, negated);
2136 set_address(new_node, &addr);
2137 set_ia32_op_type(new_node, ia32_AddrModeD);
2138 set_ia32_ls_mode(new_node, mode);
2139 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2144 static ir_node *try_create_dest_am(ir_node *node) {
2145 ir_node *val = get_Store_value(node);
2146 ir_node *mem = get_Store_mem(node);
2147 ir_node *ptr = get_Store_ptr(node);
2148 ir_mode *mode = get_irn_mode(val);
2149 unsigned bits = get_mode_size_bits(mode);
2154 /* handle only GP modes for now... */
2155 if(!mode_needs_gp_reg(mode))
2159 /* store must be the only user of the val node */
2160 if(get_irn_n_edges(val) > 1)
2162 /* skip pointless convs */
2164 ir_node *conv_op = get_Conv_op(val);
2165 ir_mode *pred_mode = get_irn_mode(conv_op);
2166 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2174 /* value must be in the same block */
2175 if(get_nodes_block(node) != get_nodes_block(val))
2178 switch(get_irn_opcode(val)) {
2180 op1 = get_Add_left(val);
2181 op2 = get_Add_right(val);
2182 if(is_Const_1(op2)) {
2183 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2184 new_rd_ia32_IncMem);
2186 } else if(is_Const_Minus_1(op2)) {
2187 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2188 new_rd_ia32_DecMem);
2191 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2192 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2193 match_dest_am | match_commutative |
2197 op1 = get_Sub_left(val);
2198 op2 = get_Sub_right(val);
2200 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2203 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2204 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2205 match_dest_am | match_immediate |
2209 op1 = get_And_left(val);
2210 op2 = get_And_right(val);
2211 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2212 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2213 match_dest_am | match_commutative |
2217 op1 = get_Or_left(val);
2218 op2 = get_Or_right(val);
2219 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2220 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2221 match_dest_am | match_commutative |
2225 op1 = get_Eor_left(val);
2226 op2 = get_Eor_right(val);
2227 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2228 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2229 match_dest_am | match_commutative |
2233 op1 = get_Shl_left(val);
2234 op2 = get_Shl_right(val);
2235 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2236 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2237 match_dest_am | match_immediate);
2240 op1 = get_Shr_left(val);
2241 op2 = get_Shr_right(val);
2242 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2243 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2244 match_dest_am | match_immediate);
2247 op1 = get_Shrs_left(val);
2248 op2 = get_Shrs_right(val);
2249 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2250 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2251 match_dest_am | match_immediate);
2254 op1 = get_Rot_left(val);
2255 op2 = get_Rot_right(val);
2256 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2257 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2258 match_dest_am | match_immediate);
2260 /* TODO: match ROR patterns... */
2262 new_node = try_create_SetMem(val, ptr, mem);
2265 op1 = get_Minus_op(val);
2266 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2269 /* should be lowered already */
2270 assert(mode != mode_b);
2271 op1 = get_Not_op(val);
2272 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2278 if(new_node != NULL) {
2279 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2280 get_irn_pinned(node) == op_pin_state_pinned) {
2281 set_irn_pinned(new_node, op_pin_state_pinned);
2288 static int is_float_to_int32_conv(const ir_node *node)
2290 ir_mode *mode = get_irn_mode(node);
2294 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2299 conv_op = get_Conv_op(node);
2300 conv_mode = get_irn_mode(conv_op);
2302 if(!mode_is_float(conv_mode))
2309 * Transform a Store(floatConst).
2311 * @return the created ia32 Store node
2313 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) {
2314 ir_mode *mode = get_irn_mode(cns);
2315 int size = get_mode_size_bits(mode);
2316 tarval *tv = get_Const_tarval(cns);
2317 ir_node *block = get_nodes_block(node);
2318 ir_node *new_block = be_transform_node(block);
2319 ir_node *ptr = get_Store_ptr(node);
2320 ir_node *mem = get_Store_mem(node);
2321 ir_graph *irg = current_ir_graph;
2322 dbg_info *dbgi = get_irn_dbg_info(node);
2323 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2326 ia32_address_t addr;
2328 unsigned val = get_tarval_sub_bits(tv, 0) |
2329 (get_tarval_sub_bits(tv, 1) << 8) |
2330 (get_tarval_sub_bits(tv, 2) << 16) |
2331 (get_tarval_sub_bits(tv, 3) << 24);
2332 ir_node *imm = create_Immediate(NULL, 0, val);
2334 /* construct store address */
2335 memset(&addr, 0, sizeof(addr));
2336 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2338 if (addr.base == NULL) {
2341 addr.base = be_transform_node(addr.base);
2344 if (addr.index == NULL) {
2347 addr.index = be_transform_node(addr.index);
2349 addr.mem = be_transform_node(mem);
2351 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2352 addr.index, addr.mem, imm);
2354 set_irn_pinned(new_node, get_irn_pinned(node));
2355 set_ia32_op_type(new_node, ia32_AddrModeD);
2356 set_ia32_ls_mode(new_node, mode_Iu);
2358 set_address(new_node, &addr);
2360 /** add more stores if needed */
2362 unsigned val = get_tarval_sub_bits(tv, ofs) |
2363 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2364 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2365 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2366 ir_node *imm = create_Immediate(NULL, 0, val);
2369 addr.mem = new_node;
2371 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2372 addr.index, addr.mem, imm);
2374 set_irn_pinned(new_node, get_irn_pinned(node));
2375 set_ia32_op_type(new_node, ia32_AddrModeD);
2376 set_ia32_ls_mode(new_node, mode_Iu);
2378 set_address(new_node, &addr);
2383 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2388 * Transforms a normal Store.
2390 * @return the created ia32 Store node
2392 static ir_node *gen_normal_Store(ir_node *node)
2394 ir_node *val = get_Store_value(node);
2395 ir_mode *mode = get_irn_mode(val);
2396 ir_node *block = get_nodes_block(node);
2397 ir_node *new_block = be_transform_node(block);
2398 ir_node *ptr = get_Store_ptr(node);
2399 ir_node *mem = get_Store_mem(node);
2400 ir_graph *irg = current_ir_graph;
2401 dbg_info *dbgi = get_irn_dbg_info(node);
2402 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2405 ia32_address_t addr;
2407 /* check for destination address mode */
2408 new_node = try_create_dest_am(node);
2409 if (new_node != NULL)
2412 /* construct store address */
2413 memset(&addr, 0, sizeof(addr));
2414 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2416 if (addr.base == NULL) {
2419 addr.base = be_transform_node(addr.base);
2422 if (addr.index == NULL) {
2425 addr.index = be_transform_node(addr.index);
2427 addr.mem = be_transform_node(mem);
2429 if (mode_is_float(mode)) {
2430 /* convs (and strict-convs) before stores are unnecessary if the mode
2432 while (is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2433 val = get_Conv_op(val);
2435 new_val = be_transform_node(val);
2436 if (ia32_cg_config.use_sse2) {
2437 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2438 addr.index, addr.mem, new_val);
2440 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2441 addr.index, addr.mem, new_val, mode);
2443 } else if (is_float_to_int32_conv(val)) {
2444 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2445 val = get_Conv_op(val);
2447 /* convs (and strict-convs) before stores are unnecessary if the mode
2449 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2450 val = get_Conv_op(val);
2452 new_val = be_transform_node(val);
2454 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2455 addr.index, addr.mem, new_val, trunc_mode);
2457 new_val = create_immediate_or_transform(val, 0);
2458 assert(mode != mode_b);
2460 if (get_mode_size_bits(mode) == 8) {
2461 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2462 addr.index, addr.mem, new_val);
2464 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2465 addr.index, addr.mem, new_val);
2469 set_irn_pinned(new_node, get_irn_pinned(node));
2470 set_ia32_op_type(new_node, ia32_AddrModeD);
2471 set_ia32_ls_mode(new_node, mode);
2473 set_address(new_node, &addr);
2474 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2480 * Transforms a Store.
2482 * @return the created ia32 Store node
2484 static ir_node *gen_Store(ir_node *node)
2486 ir_node *val = get_Store_value(node);
2487 ir_mode *mode = get_irn_mode(val);
2489 if (mode_is_float(mode) && is_Const(val)) {
2492 /* we are storing a floating point constant */
2493 if (ia32_cg_config.use_sse2) {
2494 transform = !is_simple_sse_Const(val);
2496 transform = !is_simple_x87_Const(val);
2499 return gen_float_const_Store(node, val);
2501 return gen_normal_Store(node);
2505 * Transforms a Switch.
2507 * @return the created ia32 SwitchJmp node
2509 static ir_node *create_Switch(ir_node *node)
2511 ir_graph *irg = current_ir_graph;
2512 dbg_info *dbgi = get_irn_dbg_info(node);
2513 ir_node *block = be_transform_node(get_nodes_block(node));
2514 ir_node *sel = get_Cond_selector(node);
2515 ir_node *new_sel = be_transform_node(sel);
2516 int switch_min = INT_MAX;
2517 int switch_max = INT_MIN;
2518 long default_pn = get_Cond_defaultProj(node);
2520 const ir_edge_t *edge;
2522 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2524 /* determine the smallest switch case value */
2525 foreach_out_edge(node, edge) {
2526 ir_node *proj = get_edge_src_irn(edge);
2527 long pn = get_Proj_proj(proj);
2528 if(pn == default_pn)
2537 if((unsigned) (switch_max - switch_min) > 256000) {
2538 panic("Size of switch %+F bigger than 256000", node);
2541 if (switch_min != 0) {
2542 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2544 /* if smallest switch case is not 0 we need an additional sub */
2545 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2546 add_ia32_am_offs_int(new_sel, -switch_min);
2547 set_ia32_op_type(new_sel, ia32_AddrModeS);
2549 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2552 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2553 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2559 * Transform a Cond node.
2561 static ir_node *gen_Cond(ir_node *node) {
2562 ir_node *block = get_nodes_block(node);
2563 ir_node *new_block = be_transform_node(block);
2564 ir_graph *irg = current_ir_graph;
2565 dbg_info *dbgi = get_irn_dbg_info(node);
2566 ir_node *sel = get_Cond_selector(node);
2567 ir_mode *sel_mode = get_irn_mode(sel);
2568 ir_node *flags = NULL;
2572 if (sel_mode != mode_b) {
2573 return create_Switch(node);
2576 /* we get flags from a cmp */
2577 flags = get_flags_node(sel, &pnc);
2579 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2580 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2586 * Transforms a CopyB node.
2588 * @return The transformed node.
2590 static ir_node *gen_CopyB(ir_node *node) {
2591 ir_node *block = be_transform_node(get_nodes_block(node));
2592 ir_node *src = get_CopyB_src(node);
2593 ir_node *new_src = be_transform_node(src);
2594 ir_node *dst = get_CopyB_dst(node);
2595 ir_node *new_dst = be_transform_node(dst);
2596 ir_node *mem = get_CopyB_mem(node);
2597 ir_node *new_mem = be_transform_node(mem);
2598 ir_node *res = NULL;
2599 ir_graph *irg = current_ir_graph;
2600 dbg_info *dbgi = get_irn_dbg_info(node);
2601 int size = get_type_size_bytes(get_CopyB_type(node));
2604 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2605 /* then we need the size explicitly in ECX. */
2606 if (size >= 32 * 4) {
2607 rem = size & 0x3; /* size % 4 */
2610 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2611 add_irn_dep(res, get_irg_frame(irg));
2613 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2616 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2619 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2622 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2627 static ir_node *gen_be_Copy(ir_node *node)
2629 ir_node *new_node = be_duplicate_node(node);
2630 ir_mode *mode = get_irn_mode(new_node);
2632 if (mode_needs_gp_reg(mode)) {
2633 set_irn_mode(new_node, mode_Iu);
2639 static ir_node *create_Fucom(ir_node *node)
2641 ir_graph *irg = current_ir_graph;
2642 dbg_info *dbgi = get_irn_dbg_info(node);
2643 ir_node *block = get_nodes_block(node);
2644 ir_node *new_block = be_transform_node(block);
2645 ir_node *left = get_Cmp_left(node);
2646 ir_node *new_left = be_transform_node(left);
2647 ir_node *right = get_Cmp_right(node);
2651 if(ia32_cg_config.use_fucomi) {
2652 new_right = be_transform_node(right);
2653 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2655 set_ia32_commutative(new_node);
2656 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2658 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2659 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2662 new_right = be_transform_node(right);
2663 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2667 set_ia32_commutative(new_node);
2669 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2671 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2672 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2678 static ir_node *create_Ucomi(ir_node *node)
2680 ir_graph *irg = current_ir_graph;
2681 dbg_info *dbgi = get_irn_dbg_info(node);
2682 ir_node *src_block = get_nodes_block(node);
2683 ir_node *new_block = be_transform_node(src_block);
2684 ir_node *left = get_Cmp_left(node);
2685 ir_node *right = get_Cmp_right(node);
2687 ia32_address_mode_t am;
2688 ia32_address_t *addr = &am.addr;
2690 match_arguments(&am, src_block, left, right, NULL,
2691 match_commutative | match_am);
2693 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2694 addr->mem, am.new_op1, am.new_op2,
2696 set_am_attributes(new_node, &am);
2698 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2700 new_node = fix_mem_proj(new_node, &am);
2706 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2707 * to fold an and into a test node
2709 static int can_fold_test_and(ir_node *node)
2711 const ir_edge_t *edge;
2713 /** we can only have eq and lg projs */
2714 foreach_out_edge(node, edge) {
2715 ir_node *proj = get_edge_src_irn(edge);
2716 pn_Cmp pnc = get_Proj_proj(proj);
2717 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2724 static ir_node *gen_Cmp(ir_node *node)
2726 ir_graph *irg = current_ir_graph;
2727 dbg_info *dbgi = get_irn_dbg_info(node);
2728 ir_node *block = get_nodes_block(node);
2729 ir_node *new_block = be_transform_node(block);
2730 ir_node *left = get_Cmp_left(node);
2731 ir_node *right = get_Cmp_right(node);
2732 ir_mode *cmp_mode = get_irn_mode(left);
2734 ia32_address_mode_t am;
2735 ia32_address_t *addr = &am.addr;
2738 if(mode_is_float(cmp_mode)) {
2739 if (ia32_cg_config.use_sse2) {
2740 return create_Ucomi(node);
2742 return create_Fucom(node);
2746 assert(mode_needs_gp_reg(cmp_mode));
2748 /* we prefer the Test instruction where possible except cases where
2749 * we can use SourceAM */
2750 cmp_unsigned = !mode_is_signed(cmp_mode);
2751 if (is_Const_0(right)) {
2753 get_irn_n_edges(left) == 1 &&
2754 can_fold_test_and(node)) {
2755 /* Test(and_left, and_right) */
2756 ir_node *and_left = get_And_left(left);
2757 ir_node *and_right = get_And_right(left);
2758 ir_mode *mode = get_irn_mode(and_left);
2760 match_arguments(&am, block, and_left, and_right, NULL,
2762 match_am | match_8bit_am | match_16bit_am |
2763 match_am_and_immediates | match_immediate |
2764 match_8bit | match_16bit);
2765 if (get_mode_size_bits(mode) == 8) {
2766 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2767 addr->index, addr->mem, am.new_op1,
2768 am.new_op2, am.ins_permuted,
2771 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2772 addr->index, addr->mem, am.new_op1,
2773 am.new_op2, am.ins_permuted, cmp_unsigned);
2776 match_arguments(&am, block, NULL, left, NULL,
2777 match_am | match_8bit_am | match_16bit_am |
2778 match_8bit | match_16bit);
2779 if (am.op_type == ia32_AddrModeS) {
2781 ir_node *imm_zero = try_create_Immediate(right, 0);
2782 if (get_mode_size_bits(cmp_mode) == 8) {
2783 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2784 addr->index, addr->mem, am.new_op2,
2785 imm_zero, am.ins_permuted,
2788 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2789 addr->index, addr->mem, am.new_op2,
2790 imm_zero, am.ins_permuted, cmp_unsigned);
2793 /* Test(left, left) */
2794 if (get_mode_size_bits(cmp_mode) == 8) {
2795 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2796 addr->index, addr->mem, am.new_op2,
2797 am.new_op2, am.ins_permuted,
2800 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2801 addr->index, addr->mem, am.new_op2,
2802 am.new_op2, am.ins_permuted,
2808 /* Cmp(left, right) */
2809 match_arguments(&am, block, left, right, NULL,
2810 match_commutative | match_am | match_8bit_am |
2811 match_16bit_am | match_am_and_immediates |
2812 match_immediate | match_8bit | match_16bit);
2813 if (get_mode_size_bits(cmp_mode) == 8) {
2814 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2815 addr->index, addr->mem, am.new_op1,
2816 am.new_op2, am.ins_permuted,
2819 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2820 addr->index, addr->mem, am.new_op1,
2821 am.new_op2, am.ins_permuted, cmp_unsigned);
2824 set_am_attributes(new_node, &am);
2825 assert(cmp_mode != NULL);
2826 set_ia32_ls_mode(new_node, cmp_mode);
2828 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2830 new_node = fix_mem_proj(new_node, &am);
2835 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2838 ir_graph *irg = current_ir_graph;
2839 dbg_info *dbgi = get_irn_dbg_info(node);
2840 ir_node *block = get_nodes_block(node);
2841 ir_node *new_block = be_transform_node(block);
2842 ir_node *val_true = get_Psi_val(node, 0);
2843 ir_node *val_false = get_Psi_default(node);
2845 match_flags_t match_flags;
2846 ia32_address_mode_t am;
2847 ia32_address_t *addr;
2849 assert(ia32_cg_config.use_cmov);
2850 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2854 match_flags = match_commutative | match_am | match_16bit_am |
2857 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2859 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2860 addr->mem, am.new_op1, am.new_op2, new_flags,
2861 am.ins_permuted, pnc);
2862 set_am_attributes(new_node, &am);
2864 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2866 new_node = fix_mem_proj(new_node, &am);
2873 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2874 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2877 ir_graph *irg = current_ir_graph;
2878 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2879 ir_node *nomem = new_NoMem();
2880 ir_mode *mode = get_irn_mode(orig_node);
2883 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2884 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2886 /* we might need to conv the result up */
2887 if(get_mode_size_bits(mode) > 8) {
2888 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2889 nomem, new_node, mode_Bu);
2890 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2897 * Transforms a Psi node into CMov.
2899 * @return The transformed node.
2901 static ir_node *gen_Psi(ir_node *node)
2903 dbg_info *dbgi = get_irn_dbg_info(node);
2904 ir_node *block = get_nodes_block(node);
2905 ir_node *new_block = be_transform_node(block);
2906 ir_node *psi_true = get_Psi_val(node, 0);
2907 ir_node *psi_default = get_Psi_default(node);
2908 ir_node *cond = get_Psi_cond(node, 0);
2909 ir_node *flags = NULL;
2913 assert(get_Psi_n_conds(node) == 1);
2914 assert(get_irn_mode(cond) == mode_b);
2915 assert(mode_needs_gp_reg(get_irn_mode(node)));
2917 flags = get_flags_node(cond, &pnc);
2919 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2920 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2921 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2922 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2924 new_node = create_CMov(node, cond, flags, pnc);
2931 * Create a conversion from x87 state register to general purpose.
2933 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2934 ir_node *block = be_transform_node(get_nodes_block(node));
2935 ir_node *op = get_Conv_op(node);
2936 ir_node *new_op = be_transform_node(op);
2937 ia32_code_gen_t *cg = env_cg;
2938 ir_graph *irg = current_ir_graph;
2939 dbg_info *dbgi = get_irn_dbg_info(node);
2940 ir_node *noreg = ia32_new_NoReg_gp(cg);
2941 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2942 ir_mode *mode = get_irn_mode(node);
2943 ir_node *fist, *load;
2946 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2947 new_NoMem(), new_op, trunc_mode);
2949 set_irn_pinned(fist, op_pin_state_floats);
2950 set_ia32_use_frame(fist);
2951 set_ia32_op_type(fist, ia32_AddrModeD);
2953 assert(get_mode_size_bits(mode) <= 32);
2954 /* exception we can only store signed 32 bit integers, so for unsigned
2955 we store a 64bit (signed) integer and load the lower bits */
2956 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2957 set_ia32_ls_mode(fist, mode_Ls);
2959 set_ia32_ls_mode(fist, mode_Is);
2961 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2964 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2966 set_irn_pinned(load, op_pin_state_floats);
2967 set_ia32_use_frame(load);
2968 set_ia32_op_type(load, ia32_AddrModeS);
2969 set_ia32_ls_mode(load, mode_Is);
2970 if(get_ia32_ls_mode(fist) == mode_Ls) {
2971 ia32_attr_t *attr = get_ia32_attr(load);
2972 attr->data.need_64bit_stackent = 1;
2974 ia32_attr_t *attr = get_ia32_attr(load);
2975 attr->data.need_32bit_stackent = 1;
2977 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2979 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2983 * Creates a x87 strict Conv by placing a Sore and a Load
2985 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2987 ir_node *block = get_nodes_block(node);
2988 ir_graph *irg = current_ir_graph;
2989 dbg_info *dbgi = get_irn_dbg_info(node);
2990 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2991 ir_node *nomem = new_NoMem();
2992 ir_node *frame = get_irg_frame(irg);
2993 ir_node *store, *load;
2996 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2998 set_ia32_use_frame(store);
2999 set_ia32_op_type(store, ia32_AddrModeD);
3000 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3002 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3004 set_ia32_use_frame(load);
3005 set_ia32_op_type(load, ia32_AddrModeS);
3006 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3008 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3013 * Create a conversion from general purpose to x87 register
3015 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3016 ir_node *src_block = get_nodes_block(node);
3017 ir_node *block = be_transform_node(src_block);
3018 ir_graph *irg = current_ir_graph;
3019 dbg_info *dbgi = get_irn_dbg_info(node);
3020 ir_node *op = get_Conv_op(node);
3021 ir_node *new_op = NULL;
3025 ir_mode *store_mode;
3031 /* fild can use source AM if the operand is a signed 32bit integer */
3032 if (src_mode == mode_Is) {
3033 ia32_address_mode_t am;
3035 match_arguments(&am, src_block, NULL, op, NULL,
3036 match_am | match_try_am);
3037 if (am.op_type == ia32_AddrModeS) {
3038 ia32_address_t *addr = &am.addr;
3040 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3041 addr->index, addr->mem);
3042 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3045 set_am_attributes(fild, &am);
3046 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3048 fix_mem_proj(fild, &am);
3053 if(new_op == NULL) {
3054 new_op = be_transform_node(op);
3057 noreg = ia32_new_NoReg_gp(env_cg);
3058 nomem = new_NoMem();
3059 mode = get_irn_mode(op);
3061 /* first convert to 32 bit signed if necessary */
3062 src_bits = get_mode_size_bits(src_mode);
3063 if (src_bits == 8) {
3064 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3066 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3068 } else if (src_bits < 32) {
3069 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3071 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3075 assert(get_mode_size_bits(mode) == 32);
3078 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3081 set_ia32_use_frame(store);
3082 set_ia32_op_type(store, ia32_AddrModeD);
3083 set_ia32_ls_mode(store, mode_Iu);
3085 /* exception for 32bit unsigned, do a 64bit spill+load */
3086 if(!mode_is_signed(mode)) {
3089 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3091 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3092 get_irg_frame(irg), noreg, nomem,
3095 set_ia32_use_frame(zero_store);
3096 set_ia32_op_type(zero_store, ia32_AddrModeD);
3097 add_ia32_am_offs_int(zero_store, 4);
3098 set_ia32_ls_mode(zero_store, mode_Iu);
3103 store = new_rd_Sync(dbgi, irg, block, 2, in);
3104 store_mode = mode_Ls;
3106 store_mode = mode_Is;
3110 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3112 set_ia32_use_frame(fild);
3113 set_ia32_op_type(fild, ia32_AddrModeS);
3114 set_ia32_ls_mode(fild, store_mode);
3116 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3122 * Create a conversion from one integer mode into another one
3124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3125 dbg_info *dbgi, ir_node *block, ir_node *op,
3128 ir_graph *irg = current_ir_graph;
3129 int src_bits = get_mode_size_bits(src_mode);
3130 int tgt_bits = get_mode_size_bits(tgt_mode);
3131 ir_node *new_block = be_transform_node(block);
3133 ir_mode *smaller_mode;
3135 ia32_address_mode_t am;
3136 ia32_address_t *addr = &am.addr;
3139 if (src_bits < tgt_bits) {
3140 smaller_mode = src_mode;
3141 smaller_bits = src_bits;
3143 smaller_mode = tgt_mode;
3144 smaller_bits = tgt_bits;
3147 #ifdef DEBUG_libfirm
3149 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3154 match_arguments(&am, block, NULL, op, NULL,
3155 match_8bit | match_16bit |
3156 match_am | match_8bit_am | match_16bit_am);
3157 if (smaller_bits == 8) {
3158 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3159 addr->index, addr->mem, am.new_op2,
3162 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3163 addr->index, addr->mem, am.new_op2,
3166 set_am_attributes(new_node, &am);
3167 /* match_arguments assume that out-mode = in-mode, this isn't true here
3169 set_ia32_ls_mode(new_node, smaller_mode);
3170 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3171 new_node = fix_mem_proj(new_node, &am);
3176 * Transforms a Conv node.
3178 * @return The created ia32 Conv node
3180 static ir_node *gen_Conv(ir_node *node) {
3181 ir_node *block = get_nodes_block(node);
3182 ir_node *new_block = be_transform_node(block);
3183 ir_node *op = get_Conv_op(node);
3184 ir_node *new_op = NULL;
3185 ir_graph *irg = current_ir_graph;
3186 dbg_info *dbgi = get_irn_dbg_info(node);
3187 ir_mode *src_mode = get_irn_mode(op);
3188 ir_mode *tgt_mode = get_irn_mode(node);
3189 int src_bits = get_mode_size_bits(src_mode);
3190 int tgt_bits = get_mode_size_bits(tgt_mode);
3191 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3192 ir_node *nomem = new_rd_NoMem(irg);
3193 ir_node *res = NULL;
3195 if (src_mode == mode_b) {
3196 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3197 /* nothing to do, we already model bools as 0/1 ints */
3198 return be_transform_node(op);
3201 if (src_mode == tgt_mode) {
3202 if (get_Conv_strict(node)) {
3203 if (ia32_cg_config.use_sse2) {
3204 /* when we are in SSE mode, we can kill all strict no-op conversion */
3205 return be_transform_node(op);
3208 /* this should be optimized already, but who knows... */
3209 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3210 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3211 return be_transform_node(op);
3215 if (mode_is_float(src_mode)) {
3216 new_op = be_transform_node(op);
3217 /* we convert from float ... */
3218 if (mode_is_float(tgt_mode)) {
3219 if(src_mode == mode_E && tgt_mode == mode_D
3220 && !get_Conv_strict(node)) {
3221 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3226 if (ia32_cg_config.use_sse2) {
3227 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3228 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3230 set_ia32_ls_mode(res, tgt_mode);
3232 if(get_Conv_strict(node)) {
3233 res = gen_x87_strict_conv(tgt_mode, new_op);
3234 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3237 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3242 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3243 if (ia32_cg_config.use_sse2) {
3244 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3246 set_ia32_ls_mode(res, src_mode);
3248 return gen_x87_fp_to_gp(node);
3252 /* we convert from int ... */
3253 if (mode_is_float(tgt_mode)) {
3255 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3256 if (ia32_cg_config.use_sse2) {
3257 new_op = be_transform_node(op);
3258 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3260 set_ia32_ls_mode(res, tgt_mode);
3262 res = gen_x87_gp_to_fp(node, src_mode);
3263 if(get_Conv_strict(node)) {
3264 res = gen_x87_strict_conv(tgt_mode, res);
3265 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3266 ia32_get_old_node_name(env_cg, node));
3270 } else if(tgt_mode == mode_b) {
3271 /* mode_b lowering already took care that we only have 0/1 values */
3272 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3273 src_mode, tgt_mode));
3274 return be_transform_node(op);
3277 if (src_bits == tgt_bits) {
3278 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3279 src_mode, tgt_mode));
3280 return be_transform_node(op);
3283 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3291 static int check_immediate_constraint(long val, char immediate_constraint_type)
3293 switch (immediate_constraint_type) {
3297 return val >= 0 && val <= 32;
3299 return val >= 0 && val <= 63;
3301 return val >= -128 && val <= 127;
3303 return val == 0xff || val == 0xffff;
3305 return val >= 0 && val <= 3;
3307 return val >= 0 && val <= 255;
3309 return val >= 0 && val <= 127;
3313 panic("Invalid immediate constraint found");
3317 static ir_node *try_create_Immediate(ir_node *node,
3318 char immediate_constraint_type)
3321 tarval *offset = NULL;
3322 int offset_sign = 0;
3324 ir_entity *symconst_ent = NULL;
3325 int symconst_sign = 0;
3327 ir_node *cnst = NULL;
3328 ir_node *symconst = NULL;
3331 mode = get_irn_mode(node);
3332 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3336 if(is_Minus(node)) {
3338 node = get_Minus_op(node);
3341 if(is_Const(node)) {
3344 offset_sign = minus;
3345 } else if(is_SymConst(node)) {
3348 symconst_sign = minus;
3349 } else if(is_Add(node)) {
3350 ir_node *left = get_Add_left(node);
3351 ir_node *right = get_Add_right(node);
3352 if(is_Const(left) && is_SymConst(right)) {
3355 symconst_sign = minus;
3356 offset_sign = minus;
3357 } else if(is_SymConst(left) && is_Const(right)) {
3360 symconst_sign = minus;
3361 offset_sign = minus;
3363 } else if(is_Sub(node)) {
3364 ir_node *left = get_Sub_left(node);
3365 ir_node *right = get_Sub_right(node);
3366 if(is_Const(left) && is_SymConst(right)) {
3369 symconst_sign = !minus;
3370 offset_sign = minus;
3371 } else if(is_SymConst(left) && is_Const(right)) {
3374 symconst_sign = minus;
3375 offset_sign = !minus;
3382 offset = get_Const_tarval(cnst);
3383 if(tarval_is_long(offset)) {
3384 val = get_tarval_long(offset);
3386 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3391 if(!check_immediate_constraint(val, immediate_constraint_type))
3394 if(symconst != NULL) {
3395 if(immediate_constraint_type != 0) {
3396 /* we need full 32bits for symconsts */
3400 /* unfortunately the assembler/linker doesn't support -symconst */
3404 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3406 symconst_ent = get_SymConst_entity(symconst);
3408 if(cnst == NULL && symconst == NULL)
3411 if(offset_sign && offset != NULL) {
3412 offset = tarval_neg(offset);
3415 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3420 static ir_node *create_immediate_or_transform(ir_node *node,
3421 char immediate_constraint_type)
3423 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3424 if (new_node == NULL) {
3425 new_node = be_transform_node(node);
3430 static const arch_register_req_t no_register_req = {
3431 arch_register_req_type_none,
3432 NULL, /* regclass */
3433 NULL, /* limit bitset */
3435 0 /* different pos */
3439 * An assembler constraint.
3441 typedef struct constraint_t constraint_t;
3442 struct constraint_t {
3445 const arch_register_req_t **out_reqs;
3447 const arch_register_req_t *req;
3448 unsigned immediate_possible;
3449 char immediate_type;
3452 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3454 int immediate_possible = 0;
3455 char immediate_type = 0;
3456 unsigned limited = 0;
3457 const arch_register_class_t *cls = NULL;
3458 ir_graph *irg = current_ir_graph;
3459 struct obstack *obst = get_irg_obstack(irg);
3460 arch_register_req_t *req;
3461 unsigned *limited_ptr = NULL;
3465 /* TODO: replace all the asserts with nice error messages */
3468 /* a memory constraint: no need to do anything in backend about it
3469 * (the dependencies are already respected by the memory edge of
3471 constraint->req = &no_register_req;
3483 assert(cls == NULL ||
3484 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3485 cls = &ia32_reg_classes[CLASS_ia32_gp];
3486 limited |= 1 << REG_EAX;
3489 assert(cls == NULL ||
3490 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3491 cls = &ia32_reg_classes[CLASS_ia32_gp];
3492 limited |= 1 << REG_EBX;
3495 assert(cls == NULL ||
3496 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3497 cls = &ia32_reg_classes[CLASS_ia32_gp];
3498 limited |= 1 << REG_ECX;
3501 assert(cls == NULL ||
3502 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3503 cls = &ia32_reg_classes[CLASS_ia32_gp];
3504 limited |= 1 << REG_EDX;
3507 assert(cls == NULL ||
3508 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3509 cls = &ia32_reg_classes[CLASS_ia32_gp];
3510 limited |= 1 << REG_EDI;
3513 assert(cls == NULL ||
3514 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3515 cls = &ia32_reg_classes[CLASS_ia32_gp];
3516 limited |= 1 << REG_ESI;
3519 case 'q': /* q means lower part of the regs only, this makes no
3520 * difference to Q for us (we only assigne whole registers) */
3521 assert(cls == NULL ||
3522 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3523 cls = &ia32_reg_classes[CLASS_ia32_gp];
3524 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3528 assert(cls == NULL ||
3529 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3530 cls = &ia32_reg_classes[CLASS_ia32_gp];
3531 limited |= 1 << REG_EAX | 1 << REG_EDX;
3534 assert(cls == NULL ||
3535 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3536 cls = &ia32_reg_classes[CLASS_ia32_gp];
3537 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3538 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3545 assert(cls == NULL);
3546 cls = &ia32_reg_classes[CLASS_ia32_gp];
3552 /* TODO: mark values so the x87 simulator knows about t and u */
3553 assert(cls == NULL);
3554 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3559 assert(cls == NULL);
3560 /* TODO: check that sse2 is supported */
3561 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3571 assert(!immediate_possible);
3572 immediate_possible = 1;
3573 immediate_type = *c;
3577 assert(!immediate_possible);
3578 immediate_possible = 1;
3582 assert(!immediate_possible && cls == NULL);
3583 immediate_possible = 1;
3584 cls = &ia32_reg_classes[CLASS_ia32_gp];
3597 assert(constraint->is_in && "can only specify same constraint "
3600 sscanf(c, "%d%n", &same_as, &p);
3608 /* memory constraint no need to do anything in backend about it
3609 * (the dependencies are already respected by the memory edge of
3611 constraint->req = &no_register_req;
3614 case 'E': /* no float consts yet */
3615 case 'F': /* no float consts yet */
3616 case 's': /* makes no sense on x86 */
3617 case 'X': /* we can't support that in firm */
3620 case '<': /* no autodecrement on x86 */
3621 case '>': /* no autoincrement on x86 */
3622 case 'C': /* sse constant not supported yet */
3623 case 'G': /* 80387 constant not supported yet */
3624 case 'y': /* we don't support mmx registers yet */
3625 case 'Z': /* not available in 32 bit mode */
3626 case 'e': /* not available in 32 bit mode */
3627 panic("unsupported asm constraint '%c' found in (%+F)",
3628 *c, current_ir_graph);
3631 panic("unknown asm constraint '%c' found in (%+F)", *c,
3639 const arch_register_req_t *other_constr;
3641 assert(cls == NULL && "same as and register constraint not supported");
3642 assert(!immediate_possible && "same as and immediate constraint not "
3644 assert(same_as < constraint->n_outs && "wrong constraint number in "
3645 "same_as constraint");
3647 other_constr = constraint->out_reqs[same_as];
3649 req = obstack_alloc(obst, sizeof(req[0]));
3650 req->cls = other_constr->cls;
3651 req->type = arch_register_req_type_should_be_same;
3652 req->limited = NULL;
3653 req->other_same = 1U << pos;
3654 req->other_different = 0;
3656 /* switch constraints. This is because in firm we have same_as
3657 * constraints on the output constraints while in the gcc asm syntax
3658 * they are specified on the input constraints */
3659 constraint->req = other_constr;
3660 constraint->out_reqs[same_as] = req;
3661 constraint->immediate_possible = 0;
3665 if(immediate_possible && cls == NULL) {
3666 cls = &ia32_reg_classes[CLASS_ia32_gp];
3668 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3669 assert(cls != NULL);
3671 if(immediate_possible) {
3672 assert(constraint->is_in
3673 && "immediate make no sense for output constraints");
3675 /* todo: check types (no float input on 'r' constrained in and such... */
3678 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3679 limited_ptr = (unsigned*) (req+1);
3681 req = obstack_alloc(obst, sizeof(req[0]));
3683 memset(req, 0, sizeof(req[0]));
3686 req->type = arch_register_req_type_limited;
3687 *limited_ptr = limited;
3688 req->limited = limited_ptr;
3690 req->type = arch_register_req_type_normal;
3694 constraint->req = req;
3695 constraint->immediate_possible = immediate_possible;
3696 constraint->immediate_type = immediate_type;
3699 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3700 const char *clobber)
3702 ir_graph *irg = get_irn_irg(node);
3703 struct obstack *obst = get_irg_obstack(irg);
3704 const arch_register_t *reg = NULL;
3707 arch_register_req_t *req;
3708 const arch_register_class_t *cls;
3713 /* TODO: construct a hashmap instead of doing linear search for clobber
3715 for(c = 0; c < N_CLASSES; ++c) {
3716 cls = & ia32_reg_classes[c];
3717 for(r = 0; r < cls->n_regs; ++r) {
3718 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3719 if(strcmp(temp_reg->name, clobber) == 0
3720 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3729 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3733 assert(reg->index < 32);
3735 limited = obstack_alloc(obst, sizeof(limited[0]));
3736 *limited = 1 << reg->index;
3738 req = obstack_alloc(obst, sizeof(req[0]));
3739 memset(req, 0, sizeof(req[0]));
3740 req->type = arch_register_req_type_limited;
3742 req->limited = limited;
3744 constraint->req = req;
3745 constraint->immediate_possible = 0;
3746 constraint->immediate_type = 0;
3749 static int is_memory_op(const ir_asm_constraint *constraint)
3751 ident *id = constraint->constraint;
3752 const char *str = get_id_str(id);
3755 for(c = str; *c != '\0'; ++c) {
3764 * generates code for a ASM node
3766 static ir_node *gen_ASM(ir_node *node)
3769 ir_graph *irg = current_ir_graph;
3770 ir_node *block = get_nodes_block(node);
3771 ir_node *new_block = be_transform_node(block);
3772 dbg_info *dbgi = get_irn_dbg_info(node);
3776 int n_out_constraints;
3778 const arch_register_req_t **out_reg_reqs;
3779 const arch_register_req_t **in_reg_reqs;
3780 ia32_asm_reg_t *register_map;
3781 unsigned reg_map_size = 0;
3782 struct obstack *obst;
3783 const ir_asm_constraint *in_constraints;
3784 const ir_asm_constraint *out_constraints;
3786 constraint_t parsed_constraint;
3788 arity = get_irn_arity(node);
3789 in = alloca(arity * sizeof(in[0]));
3790 memset(in, 0, arity * sizeof(in[0]));
3792 n_out_constraints = get_ASM_n_output_constraints(node);
3793 n_clobbers = get_ASM_n_clobbers(node);
3794 out_arity = n_out_constraints + n_clobbers;
3795 /* hack to keep space for mem proj */
3799 in_constraints = get_ASM_input_constraints(node);
3800 out_constraints = get_ASM_output_constraints(node);
3801 clobbers = get_ASM_clobbers(node);
3803 /* construct output constraints */
3804 obst = get_irg_obstack(irg);
3805 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3806 parsed_constraint.out_reqs = out_reg_reqs;
3807 parsed_constraint.n_outs = n_out_constraints;
3808 parsed_constraint.is_in = 0;
3810 for(i = 0; i < out_arity; ++i) {
3813 if(i < n_out_constraints) {
3814 const ir_asm_constraint *constraint = &out_constraints[i];
3815 c = get_id_str(constraint->constraint);
3816 parse_asm_constraint(i, &parsed_constraint, c);
3818 if(constraint->pos > reg_map_size)
3819 reg_map_size = constraint->pos;
3821 out_reg_reqs[i] = parsed_constraint.req;
3822 } else if(i < out_arity - 1) {
3823 ident *glob_id = clobbers [i - n_out_constraints];
3824 assert(glob_id != NULL);
3825 c = get_id_str(glob_id);
3826 parse_clobber(node, i, &parsed_constraint, c);
3828 out_reg_reqs[i+1] = parsed_constraint.req;
3832 out_reg_reqs[n_out_constraints] = &no_register_req;
3834 /* construct input constraints */
3835 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3836 parsed_constraint.is_in = 1;
3837 for(i = 0; i < arity; ++i) {
3838 const ir_asm_constraint *constraint = &in_constraints[i];
3839 ident *constr_id = constraint->constraint;
3840 const char *c = get_id_str(constr_id);
3842 parse_asm_constraint(i, &parsed_constraint, c);
3843 in_reg_reqs[i] = parsed_constraint.req;
3845 if(constraint->pos > reg_map_size)
3846 reg_map_size = constraint->pos;
3848 if(parsed_constraint.immediate_possible) {
3849 ir_node *pred = get_irn_n(node, i);
3850 char imm_type = parsed_constraint.immediate_type;
3851 ir_node *immediate = try_create_Immediate(pred, imm_type);
3853 if(immediate != NULL) {
3860 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3861 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3863 for(i = 0; i < n_out_constraints; ++i) {
3864 const ir_asm_constraint *constraint = &out_constraints[i];
3865 unsigned pos = constraint->pos;
3867 assert(pos < reg_map_size);
3868 register_map[pos].use_input = 0;
3869 register_map[pos].valid = 1;
3870 register_map[pos].memory = is_memory_op(constraint);
3871 register_map[pos].inout_pos = i;
3872 register_map[pos].mode = constraint->mode;
3875 /* transform inputs */
3876 for(i = 0; i < arity; ++i) {
3877 const ir_asm_constraint *constraint = &in_constraints[i];
3878 unsigned pos = constraint->pos;
3879 ir_node *pred = get_irn_n(node, i);
3880 ir_node *transformed;
3882 assert(pos < reg_map_size);
3883 register_map[pos].use_input = 1;
3884 register_map[pos].valid = 1;
3885 register_map[pos].memory = is_memory_op(constraint);
3886 register_map[pos].inout_pos = i;
3887 register_map[pos].mode = constraint->mode;
3892 transformed = be_transform_node(pred);
3893 in[i] = transformed;
3896 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3897 get_ASM_text(node), register_map);
3899 set_ia32_out_req_all(new_node, out_reg_reqs);
3900 set_ia32_in_req_all(new_node, in_reg_reqs);
3902 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3908 * Transforms a FrameAddr into an ia32 Add.
3910 static ir_node *gen_be_FrameAddr(ir_node *node) {
3911 ir_node *block = be_transform_node(get_nodes_block(node));
3912 ir_node *op = be_get_FrameAddr_frame(node);
3913 ir_node *new_op = be_transform_node(op);
3914 ir_graph *irg = current_ir_graph;
3915 dbg_info *dbgi = get_irn_dbg_info(node);
3916 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3919 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3920 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3921 set_ia32_use_frame(new_node);
3923 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3929 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3931 static ir_node *gen_be_Return(ir_node *node) {
3932 ir_graph *irg = current_ir_graph;
3933 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3934 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3935 ir_entity *ent = get_irg_entity(irg);
3936 ir_type *tp = get_entity_type(ent);
3941 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3942 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3945 int pn_ret_val, pn_ret_mem, arity, i;
3947 assert(ret_val != NULL);
3948 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3949 return be_duplicate_node(node);
3952 res_type = get_method_res_type(tp, 0);
3954 if (! is_Primitive_type(res_type)) {
3955 return be_duplicate_node(node);
3958 mode = get_type_mode(res_type);
3959 if (! mode_is_float(mode)) {
3960 return be_duplicate_node(node);
3963 assert(get_method_n_ress(tp) == 1);
3965 pn_ret_val = get_Proj_proj(ret_val);
3966 pn_ret_mem = get_Proj_proj(ret_mem);
3968 /* get the Barrier */
3969 barrier = get_Proj_pred(ret_val);
3971 /* get result input of the Barrier */
3972 ret_val = get_irn_n(barrier, pn_ret_val);
3973 new_ret_val = be_transform_node(ret_val);
3975 /* get memory input of the Barrier */
3976 ret_mem = get_irn_n(barrier, pn_ret_mem);
3977 new_ret_mem = be_transform_node(ret_mem);
3979 frame = get_irg_frame(irg);
3981 dbgi = get_irn_dbg_info(barrier);
3982 block = be_transform_node(get_nodes_block(barrier));
3984 noreg = ia32_new_NoReg_gp(env_cg);
3986 /* store xmm0 onto stack */
3987 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3988 new_ret_mem, new_ret_val);
3989 set_ia32_ls_mode(sse_store, mode);
3990 set_ia32_op_type(sse_store, ia32_AddrModeD);
3991 set_ia32_use_frame(sse_store);
3993 /* load into x87 register */
3994 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3995 set_ia32_op_type(fld, ia32_AddrModeS);
3996 set_ia32_use_frame(fld);
3998 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3999 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
4001 /* create a new barrier */
4002 arity = get_irn_arity(barrier);
4003 in = alloca(arity * sizeof(in[0]));
4004 for (i = 0; i < arity; ++i) {
4007 if (i == pn_ret_val) {
4009 } else if (i == pn_ret_mem) {
4012 ir_node *in = get_irn_n(barrier, i);
4013 new_in = be_transform_node(in);
4018 new_barrier = new_ir_node(dbgi, irg, block,
4019 get_irn_op(barrier), get_irn_mode(barrier),
4021 copy_node_attr(barrier, new_barrier);
4022 be_duplicate_deps(barrier, new_barrier);
4023 be_set_transformed_node(barrier, new_barrier);
4024 mark_irn_visited(barrier);
4026 /* transform normally */
4027 return be_duplicate_node(node);
4031 * Transform a be_AddSP into an ia32_SubSP.
4033 static ir_node *gen_be_AddSP(ir_node *node)
4035 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4036 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4038 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
4042 * Transform a be_SubSP into an ia32_AddSP
4044 static ir_node *gen_be_SubSP(ir_node *node)
4046 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4047 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4049 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
4053 * This function just sets the register for the Unknown node
4054 * as this is not done during register allocation because Unknown
4055 * is an "ignore" node.
4057 static ir_node *gen_Unknown(ir_node *node) {
4058 ir_mode *mode = get_irn_mode(node);
4060 if (mode_is_float(mode)) {
4061 if (ia32_cg_config.use_sse2) {
4062 return ia32_new_Unknown_xmm(env_cg);
4064 /* Unknown nodes are buggy in x87 simulator, use zero for now... */
4065 ir_graph *irg = current_ir_graph;
4066 dbg_info *dbgi = get_irn_dbg_info(node);
4067 ir_node *block = get_irg_start_block(irg);
4068 ir_node *ret = new_rd_ia32_vfldz(dbgi, irg, block);
4070 /* Const Nodes before the initial IncSP are a bad idea, because
4071 * they could be spilled and we have no SP ready at that point yet.
4072 * So add a dependency to the initial frame pointer calculation to
4073 * avoid that situation.
4075 add_irn_dep(ret, get_irg_frame(irg));
4078 } else if (mode_needs_gp_reg(mode)) {
4079 return ia32_new_Unknown_gp(env_cg);
4081 panic("unsupported Unknown-Mode");
4087 * Change some phi modes
4089 static ir_node *gen_Phi(ir_node *node) {
4090 ir_node *block = be_transform_node(get_nodes_block(node));
4091 ir_graph *irg = current_ir_graph;
4092 dbg_info *dbgi = get_irn_dbg_info(node);
4093 ir_mode *mode = get_irn_mode(node);
4096 if(mode_needs_gp_reg(mode)) {
4097 /* we shouldn't have any 64bit stuff around anymore */
4098 assert(get_mode_size_bits(mode) <= 32);
4099 /* all integer operations are on 32bit registers now */
4101 } else if(mode_is_float(mode)) {
4102 if (ia32_cg_config.use_sse2) {
4109 /* phi nodes allow loops, so we use the old arguments for now
4110 * and fix this later */
4111 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4112 get_irn_in(node) + 1);
4113 copy_node_attr(node, phi);
4114 be_duplicate_deps(node, phi);
4116 be_set_transformed_node(node, phi);
4117 be_enqueue_preds(node);
4125 static ir_node *gen_IJmp(ir_node *node)
4127 ir_node *block = get_nodes_block(node);
4128 ir_node *new_block = be_transform_node(block);
4129 ir_graph *irg = current_ir_graph;
4130 dbg_info *dbgi = get_irn_dbg_info(node);
4131 ir_node *op = get_IJmp_target(node);
4133 ia32_address_mode_t am;
4134 ia32_address_t *addr = &am.addr;
4136 assert(get_irn_mode(op) == mode_P);
4138 match_arguments(&am, block, NULL, op, NULL,
4139 match_am | match_8bit_am | match_16bit_am |
4140 match_immediate | match_8bit | match_16bit);
4142 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
4143 addr->mem, am.new_op2);
4144 set_am_attributes(new_node, &am);
4145 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4147 new_node = fix_mem_proj(new_node, &am);
4152 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4155 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4156 ir_node *val, ir_node *mem);
4159 * Transforms a lowered Load into a "real" one.
4161 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
4163 ir_node *block = be_transform_node(get_nodes_block(node));
4164 ir_node *ptr = get_irn_n(node, 0);
4165 ir_node *new_ptr = be_transform_node(ptr);
4166 ir_node *mem = get_irn_n(node, 1);
4167 ir_node *new_mem = be_transform_node(mem);
4168 ir_graph *irg = current_ir_graph;
4169 dbg_info *dbgi = get_irn_dbg_info(node);
4170 ir_mode *mode = get_ia32_ls_mode(node);
4171 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4174 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4176 set_ia32_op_type(new_op, ia32_AddrModeS);
4177 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4178 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4179 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4180 if (is_ia32_am_sc_sign(node))
4181 set_ia32_am_sc_sign(new_op);
4182 set_ia32_ls_mode(new_op, mode);
4183 if (is_ia32_use_frame(node)) {
4184 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4185 set_ia32_use_frame(new_op);
4188 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4194 * Transforms a lowered Store into a "real" one.
4196 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4198 ir_node *block = be_transform_node(get_nodes_block(node));
4199 ir_node *ptr = get_irn_n(node, 0);
4200 ir_node *new_ptr = be_transform_node(ptr);
4201 ir_node *val = get_irn_n(node, 1);
4202 ir_node *new_val = be_transform_node(val);
4203 ir_node *mem = get_irn_n(node, 2);
4204 ir_node *new_mem = be_transform_node(mem);
4205 ir_graph *irg = current_ir_graph;
4206 dbg_info *dbgi = get_irn_dbg_info(node);
4207 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4208 ir_mode *mode = get_ia32_ls_mode(node);
4212 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4214 am_offs = get_ia32_am_offs_int(node);
4215 add_ia32_am_offs_int(new_op, am_offs);
4217 set_ia32_op_type(new_op, ia32_AddrModeD);
4218 set_ia32_ls_mode(new_op, mode);
4219 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4220 set_ia32_use_frame(new_op);
4222 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4227 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4229 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4230 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4232 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4233 match_immediate | match_mode_neutral);
4236 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4238 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4239 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4240 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4244 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4246 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4247 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4248 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4252 static ir_node *gen_ia32_l_Add(ir_node *node) {
4253 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4254 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4255 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4256 match_commutative | match_am | match_immediate |
4257 match_mode_neutral);
4259 if(is_Proj(lowered)) {
4260 lowered = get_Proj_pred(lowered);
4262 assert(is_ia32_Add(lowered));
4263 set_irn_mode(lowered, mode_T);
4269 static ir_node *gen_ia32_l_Adc(ir_node *node)
4271 return gen_binop_flags(node, new_rd_ia32_Adc,
4272 match_commutative | match_am | match_immediate |
4273 match_mode_neutral);
4277 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4279 * @param node The node to transform
4280 * @return the created ia32 vfild node
4282 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4283 return gen_lowered_Load(node, new_rd_ia32_vfild);
4287 * Transforms an ia32_l_Load into a "real" ia32_Load node
4289 * @param node The node to transform
4290 * @return the created ia32 Load node
4292 static ir_node *gen_ia32_l_Load(ir_node *node) {
4293 return gen_lowered_Load(node, new_rd_ia32_Load);
4297 * Transforms an ia32_l_Store into a "real" ia32_Store node
4299 * @param node The node to transform
4300 * @return the created ia32 Store node
4302 static ir_node *gen_ia32_l_Store(ir_node *node) {
4303 return gen_lowered_Store(node, new_rd_ia32_Store);
4307 * Transforms a l_vfist into a "real" vfist node.
4309 * @param node The node to transform
4310 * @return the created ia32 vfist node
4312 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4313 ir_node *block = be_transform_node(get_nodes_block(node));
4314 ir_node *ptr = get_irn_n(node, 0);
4315 ir_node *new_ptr = be_transform_node(ptr);
4316 ir_node *val = get_irn_n(node, 1);
4317 ir_node *new_val = be_transform_node(val);
4318 ir_node *mem = get_irn_n(node, 2);
4319 ir_node *new_mem = be_transform_node(mem);
4320 ir_graph *irg = current_ir_graph;
4321 dbg_info *dbgi = get_irn_dbg_info(node);
4322 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4323 ir_mode *mode = get_ia32_ls_mode(node);
4324 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4328 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4329 new_val, trunc_mode);
4331 am_offs = get_ia32_am_offs_int(node);
4332 add_ia32_am_offs_int(new_op, am_offs);
4334 set_ia32_op_type(new_op, ia32_AddrModeD);
4335 set_ia32_ls_mode(new_op, mode);
4336 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4337 set_ia32_use_frame(new_op);
4339 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4345 * Transforms a l_MulS into a "real" MulS node.
4347 * @return the created ia32 Mul node
4349 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4350 ir_node *left = get_binop_left(node);
4351 ir_node *right = get_binop_right(node);
4353 return gen_binop(node, left, right, new_rd_ia32_Mul,
4354 match_commutative | match_am | match_mode_neutral);
4358 * Transforms a l_IMulS into a "real" IMul1OPS node.
4360 * @return the created ia32 IMul1OP node
4362 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4363 ir_node *left = get_binop_left(node);
4364 ir_node *right = get_binop_right(node);
4366 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4367 match_commutative | match_am | match_mode_neutral);
4370 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4371 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4372 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4373 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4374 match_am | match_immediate | match_mode_neutral);
4376 if(is_Proj(lowered)) {
4377 lowered = get_Proj_pred(lowered);
4379 assert(is_ia32_Sub(lowered));
4380 set_irn_mode(lowered, mode_T);
4386 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4387 return gen_binop_flags(node, new_rd_ia32_Sbb,
4388 match_am | match_immediate | match_mode_neutral);
4392 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4393 * op1 - target to be shifted
4394 * op2 - contains bits to be shifted into target
4396 * Only op3 can be an immediate.
4398 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4399 ir_node *low, ir_node *count)
4401 ir_node *block = get_nodes_block(node);
4402 ir_node *new_block = be_transform_node(block);
4403 ir_graph *irg = current_ir_graph;
4404 dbg_info *dbgi = get_irn_dbg_info(node);
4405 ir_node *new_high = be_transform_node(high);
4406 ir_node *new_low = be_transform_node(low);
4410 /* the shift amount can be any mode that is bigger than 5 bits, since all
4411 * other bits are ignored anyway */
4412 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4413 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4414 count = get_Conv_op(count);
4416 new_count = create_immediate_or_transform(count, 0);
4418 if (is_ia32_l_ShlD(node)) {
4419 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4422 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4425 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4430 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4432 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4433 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4434 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4435 return gen_lowered_64bit_shifts(node, high, low, count);
4438 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4440 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4441 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4442 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4443 return gen_lowered_64bit_shifts(node, high, low, count);
4446 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4447 ir_node *src_block = get_nodes_block(node);
4448 ir_node *block = be_transform_node(src_block);
4449 ir_graph *irg = current_ir_graph;
4450 dbg_info *dbgi = get_irn_dbg_info(node);
4451 ir_node *frame = get_irg_frame(irg);
4452 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4453 ir_node *nomem = new_NoMem();
4454 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4455 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4456 ir_node *new_val_low = be_transform_node(val_low);
4457 ir_node *new_val_high = be_transform_node(val_high);
4462 ir_node *store_high;
4464 if(!mode_is_signed(get_irn_mode(val_high))) {
4465 panic("unsigned long long -> float not supported yet (%+F)", node);
4469 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4471 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4473 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4474 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4476 set_ia32_use_frame(store_low);
4477 set_ia32_use_frame(store_high);
4478 set_ia32_op_type(store_low, ia32_AddrModeD);
4479 set_ia32_op_type(store_high, ia32_AddrModeD);
4480 set_ia32_ls_mode(store_low, mode_Iu);
4481 set_ia32_ls_mode(store_high, mode_Is);
4482 add_ia32_am_offs_int(store_high, 4);
4486 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4489 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4491 set_ia32_use_frame(fild);
4492 set_ia32_op_type(fild, ia32_AddrModeS);
4493 set_ia32_ls_mode(fild, mode_Ls);
4495 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4497 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4500 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4501 ir_node *src_block = get_nodes_block(node);
4502 ir_node *block = be_transform_node(src_block);
4503 ir_graph *irg = current_ir_graph;
4504 dbg_info *dbgi = get_irn_dbg_info(node);
4505 ir_node *frame = get_irg_frame(irg);
4506 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4507 ir_node *nomem = new_NoMem();
4508 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4509 ir_node *new_val = be_transform_node(val);
4510 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4515 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4517 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4518 set_ia32_use_frame(fist);
4519 set_ia32_op_type(fist, ia32_AddrModeD);
4520 set_ia32_ls_mode(fist, mode_Ls);
4526 * the BAD transformer.
4528 static ir_node *bad_transform(ir_node *node) {
4529 panic("No transform function for %+F available.\n", node);
4533 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4534 ir_graph *irg = current_ir_graph;
4535 ir_node *block = be_transform_node(get_nodes_block(node));
4536 ir_node *pred = get_Proj_pred(node);
4537 ir_node *new_pred = be_transform_node(pred);
4538 ir_node *frame = get_irg_frame(irg);
4539 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4540 dbg_info *dbgi = get_irn_dbg_info(node);
4541 long pn = get_Proj_proj(node);
4546 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4547 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4548 set_ia32_use_frame(load);
4549 set_ia32_op_type(load, ia32_AddrModeS);
4550 set_ia32_ls_mode(load, mode_Iu);
4551 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4552 * 32 bit from it with this particular load */
4553 attr = get_ia32_attr(load);
4554 attr->data.need_64bit_stackent = 1;
4556 if (pn == pn_ia32_l_FloattoLL_res_high) {
4557 add_ia32_am_offs_int(load, 4);
4559 assert(pn == pn_ia32_l_FloattoLL_res_low);
4562 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4568 * Transform the Projs of an AddSP.
4570 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4571 ir_node *block = be_transform_node(get_nodes_block(node));
4572 ir_node *pred = get_Proj_pred(node);
4573 ir_node *new_pred = be_transform_node(pred);
4574 ir_graph *irg = current_ir_graph;
4575 dbg_info *dbgi = get_irn_dbg_info(node);
4576 long proj = get_Proj_proj(node);
4578 if (proj == pn_be_AddSP_sp) {
4579 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4580 pn_ia32_SubSP_stack);
4581 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4583 } else if(proj == pn_be_AddSP_res) {
4584 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4585 pn_ia32_SubSP_addr);
4586 } else if (proj == pn_be_AddSP_M) {
4587 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4591 return new_rd_Unknown(irg, get_irn_mode(node));
4595 * Transform the Projs of a SubSP.
4597 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4598 ir_node *block = be_transform_node(get_nodes_block(node));
4599 ir_node *pred = get_Proj_pred(node);
4600 ir_node *new_pred = be_transform_node(pred);
4601 ir_graph *irg = current_ir_graph;
4602 dbg_info *dbgi = get_irn_dbg_info(node);
4603 long proj = get_Proj_proj(node);
4605 if (proj == pn_be_SubSP_sp) {
4606 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4607 pn_ia32_AddSP_stack);
4608 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4610 } else if (proj == pn_be_SubSP_M) {
4611 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4615 return new_rd_Unknown(irg, get_irn_mode(node));
4619 * Transform and renumber the Projs from a Load.
4621 static ir_node *gen_Proj_Load(ir_node *node) {
4623 ir_node *block = be_transform_node(get_nodes_block(node));
4624 ir_node *pred = get_Proj_pred(node);
4625 ir_graph *irg = current_ir_graph;
4626 dbg_info *dbgi = get_irn_dbg_info(node);
4627 long proj = get_Proj_proj(node);
4630 /* loads might be part of source address mode matches, so we don't
4631 transform the ProjMs yet (with the exception of loads whose result is
4634 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4637 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4639 /* this is needed, because sometimes we have loops that are only
4640 reachable through the ProjM */
4641 be_enqueue_preds(node);
4642 /* do it in 2 steps, to silence firm verifier */
4643 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4644 set_Proj_proj(res, pn_ia32_Load_M);
4648 /* renumber the proj */
4649 new_pred = be_transform_node(pred);
4650 if (is_ia32_Load(new_pred)) {
4653 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4655 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4656 case pn_Load_X_regular:
4657 return new_rd_Jmp(dbgi, irg, block);
4658 case pn_Load_X_except:
4659 /* This Load might raise an exception. Mark it. */
4660 set_ia32_exc_label(new_pred, 1);
4661 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4665 } else if (is_ia32_Conv_I2I(new_pred) ||
4666 is_ia32_Conv_I2I8Bit(new_pred)) {
4667 set_irn_mode(new_pred, mode_T);
4668 if (proj == pn_Load_res) {
4669 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4670 } else if (proj == pn_Load_M) {
4671 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4673 } else if (is_ia32_xLoad(new_pred)) {
4676 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4678 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4679 case pn_Load_X_regular:
4680 return new_rd_Jmp(dbgi, irg, block);
4681 case pn_Load_X_except:
4682 /* This Load might raise an exception. Mark it. */
4683 set_ia32_exc_label(new_pred, 1);
4684 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4688 } else if (is_ia32_vfld(new_pred)) {
4691 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4693 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4694 case pn_Load_X_regular:
4695 return new_rd_Jmp(dbgi, irg, block);
4696 case pn_Load_X_except:
4697 /* This Load might raise an exception. Mark it. */
4698 set_ia32_exc_label(new_pred, 1);
4699 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4704 /* can happen for ProJMs when source address mode happened for the
4707 /* however it should not be the result proj, as that would mean the
4708 load had multiple users and should not have been used for
4710 if (proj != pn_Load_M) {
4711 panic("internal error: transformed node not a Load");
4713 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4717 return new_rd_Unknown(irg, get_irn_mode(node));
4721 * Transform and renumber the Projs from a DivMod like instruction.
4723 static ir_node *gen_Proj_DivMod(ir_node *node) {
4724 ir_node *block = be_transform_node(get_nodes_block(node));
4725 ir_node *pred = get_Proj_pred(node);
4726 ir_node *new_pred = be_transform_node(pred);
4727 ir_graph *irg = current_ir_graph;
4728 dbg_info *dbgi = get_irn_dbg_info(node);
4729 ir_mode *mode = get_irn_mode(node);
4730 long proj = get_Proj_proj(node);
4732 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4734 switch (get_irn_opcode(pred)) {
4738 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4740 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4741 case pn_Div_X_regular:
4742 return new_rd_Jmp(dbgi, irg, block);
4743 case pn_Div_X_except:
4744 set_ia32_exc_label(new_pred, 1);
4745 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4753 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4755 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4756 case pn_Mod_X_except:
4757 set_ia32_exc_label(new_pred, 1);
4758 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4766 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4767 case pn_DivMod_res_div:
4768 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4769 case pn_DivMod_res_mod:
4770 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4771 case pn_DivMod_X_regular:
4772 return new_rd_Jmp(dbgi, irg, block);
4773 case pn_DivMod_X_except:
4774 set_ia32_exc_label(new_pred, 1);
4775 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4785 return new_rd_Unknown(irg, mode);
4789 * Transform and renumber the Projs from a CopyB.
4791 static ir_node *gen_Proj_CopyB(ir_node *node) {
4792 ir_node *block = be_transform_node(get_nodes_block(node));
4793 ir_node *pred = get_Proj_pred(node);
4794 ir_node *new_pred = be_transform_node(pred);
4795 ir_graph *irg = current_ir_graph;
4796 dbg_info *dbgi = get_irn_dbg_info(node);
4797 ir_mode *mode = get_irn_mode(node);
4798 long proj = get_Proj_proj(node);
4801 case pn_CopyB_M_regular:
4802 if (is_ia32_CopyB_i(new_pred)) {
4803 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4804 } else if (is_ia32_CopyB(new_pred)) {
4805 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4813 return new_rd_Unknown(irg, mode);
4817 * Transform and renumber the Projs from a Quot.
4819 static ir_node *gen_Proj_Quot(ir_node *node) {
4820 ir_node *block = be_transform_node(get_nodes_block(node));
4821 ir_node *pred = get_Proj_pred(node);
4822 ir_node *new_pred = be_transform_node(pred);
4823 ir_graph *irg = current_ir_graph;
4824 dbg_info *dbgi = get_irn_dbg_info(node);
4825 ir_mode *mode = get_irn_mode(node);
4826 long proj = get_Proj_proj(node);
4830 if (is_ia32_xDiv(new_pred)) {
4831 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4832 } else if (is_ia32_vfdiv(new_pred)) {
4833 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4837 if (is_ia32_xDiv(new_pred)) {
4838 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4839 } else if (is_ia32_vfdiv(new_pred)) {
4840 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4843 case pn_Quot_X_regular:
4844 case pn_Quot_X_except:
4850 return new_rd_Unknown(irg, mode);
4854 * Transform the Thread Local Storage Proj.
4856 static ir_node *gen_Proj_tls(ir_node *node) {
4857 ir_node *block = be_transform_node(get_nodes_block(node));
4858 ir_graph *irg = current_ir_graph;
4859 dbg_info *dbgi = NULL;
4860 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4865 static ir_node *gen_be_Call(ir_node *node) {
4866 ir_node *res = be_duplicate_node(node);
4867 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4872 static ir_node *gen_be_IncSP(ir_node *node) {
4873 ir_node *res = be_duplicate_node(node);
4874 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4880 * Transform the Projs from a be_Call.
4882 static ir_node *gen_Proj_be_Call(ir_node *node) {
4883 ir_node *block = be_transform_node(get_nodes_block(node));
4884 ir_node *call = get_Proj_pred(node);
4885 ir_node *new_call = be_transform_node(call);
4886 ir_graph *irg = current_ir_graph;
4887 dbg_info *dbgi = get_irn_dbg_info(node);
4888 ir_type *method_type = be_Call_get_type(call);
4889 int n_res = get_method_n_ress(method_type);
4890 long proj = get_Proj_proj(node);
4891 ir_mode *mode = get_irn_mode(node);
4893 const arch_register_class_t *cls;
4895 /* The following is kinda tricky: If we're using SSE, then we have to
4896 * move the result value of the call in floating point registers to an
4897 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4898 * after the call, we have to make sure to correctly make the
4899 * MemProj and the result Proj use these 2 nodes
4901 if (proj == pn_be_Call_M_regular) {
4902 // get new node for result, are we doing the sse load/store hack?
4903 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4904 ir_node *call_res_new;
4905 ir_node *call_res_pred = NULL;
4907 if (call_res != NULL) {
4908 call_res_new = be_transform_node(call_res);
4909 call_res_pred = get_Proj_pred(call_res_new);
4912 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4913 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4914 pn_be_Call_M_regular);
4916 assert(is_ia32_xLoad(call_res_pred));
4917 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4921 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4922 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4924 ir_node *frame = get_irg_frame(irg);
4925 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4927 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4930 /* in case there is no memory output: create one to serialize the copy
4932 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4933 pn_be_Call_M_regular);
4934 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4935 pn_be_Call_first_res);
4937 /* store st(0) onto stack */
4938 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4940 set_ia32_op_type(fstp, ia32_AddrModeD);
4941 set_ia32_use_frame(fstp);
4943 /* load into SSE register */
4944 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4946 set_ia32_op_type(sse_load, ia32_AddrModeS);
4947 set_ia32_use_frame(sse_load);
4949 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4955 /* transform call modes */
4956 if (mode_is_data(mode)) {
4957 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4961 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4965 * Transform the Projs from a Cmp.
4967 static ir_node *gen_Proj_Cmp(ir_node *node)
4969 /* this probably means not all mode_b nodes were lowered... */
4970 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4975 * Transform and potentially renumber Proj nodes.
4977 static ir_node *gen_Proj(ir_node *node) {
4978 ir_node *pred = get_Proj_pred(node);
4979 if (is_Store(pred)) {
4980 long proj = get_Proj_proj(node);
4981 if (proj == pn_Store_M) {
4982 return be_transform_node(pred);
4985 return new_r_Bad(current_ir_graph);
4987 } else if (is_Load(pred)) {
4988 return gen_Proj_Load(node);
4989 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4990 return gen_Proj_DivMod(node);
4991 } else if (is_CopyB(pred)) {
4992 return gen_Proj_CopyB(node);
4993 } else if (is_Quot(pred)) {
4994 return gen_Proj_Quot(node);
4995 } else if (be_is_SubSP(pred)) {
4996 return gen_Proj_be_SubSP(node);
4997 } else if (be_is_AddSP(pred)) {
4998 return gen_Proj_be_AddSP(node);
4999 } else if (be_is_Call(pred)) {
5000 return gen_Proj_be_Call(node);
5001 } else if (is_Cmp(pred)) {
5002 return gen_Proj_Cmp(node);
5003 } else if (get_irn_op(pred) == op_Start) {
5004 long proj = get_Proj_proj(node);
5005 if (proj == pn_Start_X_initial_exec) {
5006 ir_node *block = get_nodes_block(pred);
5007 dbg_info *dbgi = get_irn_dbg_info(node);
5010 /* we exchange the ProjX with a jump */
5011 block = be_transform_node(block);
5012 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
5015 if (node == be_get_old_anchor(anchor_tls)) {
5016 return gen_Proj_tls(node);
5018 } else if (is_ia32_l_FloattoLL(pred)) {
5019 return gen_Proj_l_FloattoLL(node);
5021 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5025 ir_mode *mode = get_irn_mode(node);
5026 if (mode_needs_gp_reg(mode)) {
5027 ir_node *new_pred = be_transform_node(pred);
5028 ir_node *block = be_transform_node(get_nodes_block(node));
5029 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5030 mode_Iu, get_Proj_proj(node));
5031 #ifdef DEBUG_libfirm
5032 new_proj->node_nr = node->node_nr;
5038 return be_duplicate_node(node);
5042 * Enters all transform functions into the generic pointer
5044 static void register_transformers(void)
5048 /* first clear the generic function pointer for all ops */
5049 clear_irp_opcodes_generic_func();
5051 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5052 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5090 /* transform ops from intrinsic lowering */
5106 GEN(ia32_l_LLtoFloat);
5107 GEN(ia32_l_FloattoLL);
5113 /* we should never see these nodes */
5128 /* handle generic backend nodes */
5137 op_Mulh = get_op_Mulh();
5146 * Pre-transform all unknown and noreg nodes.
5148 static void ia32_pretransform_node(void *arch_cg) {
5149 ia32_code_gen_t *cg = arch_cg;
5151 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5152 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5153 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5154 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5155 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5156 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5161 * Walker, checks if all ia32 nodes producing more than one result have
5162 * its Projs, other wise creates new projs and keep them using a be_Keep node.
5164 static void add_missing_keep_walker(ir_node *node, void *data)
5167 unsigned found_projs = 0;
5168 const ir_edge_t *edge;
5169 ir_mode *mode = get_irn_mode(node);
5174 if(!is_ia32_irn(node))
5177 n_outs = get_ia32_n_res(node);
5180 if(is_ia32_SwitchJmp(node))
5183 assert(n_outs < (int) sizeof(unsigned) * 8);
5184 foreach_out_edge(node, edge) {
5185 ir_node *proj = get_edge_src_irn(edge);
5186 int pn = get_Proj_proj(proj);
5188 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5189 found_projs |= 1 << pn;
5193 /* are keeps missing? */
5195 for(i = 0; i < n_outs; ++i) {
5198 const arch_register_req_t *req;
5199 const arch_register_class_t *class;
5201 if(found_projs & (1 << i)) {
5205 req = get_ia32_out_req(node, i);
5210 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5214 block = get_nodes_block(node);
5215 in[0] = new_r_Proj(current_ir_graph, block, node,
5216 arch_register_class_mode(class), i);
5217 if(last_keep != NULL) {
5218 be_Keep_add_node(last_keep, class, in[0]);
5220 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5221 if(sched_is_scheduled(node)) {
5222 sched_add_after(node, last_keep);
5229 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5232 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5234 ir_graph *irg = be_get_birg_irg(cg->birg);
5235 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5238 /* do the transformation */
5239 void ia32_transform_graph(ia32_code_gen_t *cg) {
5241 ir_graph *irg = cg->irg;
5243 register_transformers();
5245 initial_fpcw = NULL;
5247 BE_TIMER_PUSH(t_heights);
5248 heights = heights_new(irg);
5249 BE_TIMER_POP(t_heights);
5250 ia32_calculate_non_address_mode_nodes(cg->birg);
5252 /* the transform phase is not safe for CSE (yet) because several nodes get
5253 * attributes set after their creation */
5254 cse_last = get_opt_cse();
5257 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5259 set_opt_cse(cse_last);
5261 ia32_free_non_address_mode_nodes();
5262 heights_free(heights);
5266 void ia32_init_transform(void)
5268 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");