2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_util.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
75 #define SFP_SIGN "0x80000000"
76 #define DFP_SIGN "0x8000000000000000"
77 #define SFP_ABS "0x7FFFFFFF"
78 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
79 #define DFP_INTMAX "9223372036854775807"
80 #define ULL_BIAS "18446744073709551616"
82 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
83 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
84 #define ENT_SFP_ABS "C_ia32_sfp_abs"
85 #define ENT_DFP_ABS "C_ia32_dfp_abs"
86 #define ENT_ULL_BIAS "C_ia32_ull_bias"
88 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
89 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
91 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
93 static ir_node *initial_fpcw = NULL;
94 int ia32_no_pic_adjust;
96 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
100 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
108 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
110 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
111 ir_node *base, ir_node *index, ir_node *mem);
113 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
114 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
117 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
119 static ir_node *create_immediate_or_transform(ir_node *node,
120 char immediate_constraint_type);
122 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
123 dbg_info *dbgi, ir_node *block,
124 ir_node *op, ir_node *orig_node);
126 /* its enough to have those once */
127 static ir_node *nomem, *noreg_GP;
129 /** a list to postprocess all calls */
130 static ir_node **call_list;
131 static ir_type **call_types;
133 /** Return non-zero is a node represents the 0 constant. */
134 static bool is_Const_0(ir_node *node)
136 return is_Const(node) && is_Const_null(node);
139 /** Return non-zero is a node represents the 1 constant. */
140 static bool is_Const_1(ir_node *node)
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node)
148 return is_Const(node) && is_Const_all_one(node);
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_x87_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 if (tarval_is_null(tv) || tarval_is_one(tv))
160 /* TODO: match all the other float constants */
165 * returns true if constant can be created with a simple float command
167 static bool is_simple_sse_Const(ir_node *node)
169 ir_tarval *tv = get_Const_tarval(node);
170 ir_mode *mode = get_tarval_mode(tv);
175 if (tarval_is_null(tv)
176 #ifdef CONSTRUCT_SSE_CONST
181 #ifdef CONSTRUCT_SSE_CONST
182 if (mode == mode_D) {
183 unsigned val = get_tarval_sub_bits(tv, 0) |
184 (get_tarval_sub_bits(tv, 1) << 8) |
185 (get_tarval_sub_bits(tv, 2) << 16) |
186 (get_tarval_sub_bits(tv, 3) << 24);
188 /* lower 32bit are zero, really a 32bit constant */
191 #endif /* CONSTRUCT_SSE_CONST */
192 /* TODO: match all the other float constants */
197 * return NoREG or pic_base in case of PIC.
198 * This is necessary as base address for newly created symbols
200 static ir_node *get_symconst_base(void)
202 ir_graph *irg = current_ir_graph;
204 if (be_get_irg_options(irg)->pic) {
205 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
206 return arch_env->impl->get_pic_base(irg);
213 * Transforms a Const.
215 static ir_node *gen_Const(ir_node *node)
217 ir_node *old_block = get_nodes_block(node);
218 ir_node *block = be_transform_node(old_block);
219 dbg_info *dbgi = get_irn_dbg_info(node);
220 ir_mode *mode = get_irn_mode(node);
222 assert(is_Const(node));
224 if (mode_is_float(mode)) {
230 if (ia32_cg_config.use_sse2) {
231 ir_tarval *tv = get_Const_tarval(node);
232 if (tarval_is_null(tv)) {
233 load = new_bd_ia32_xZero(dbgi, block);
234 set_ia32_ls_mode(load, mode);
236 #ifdef CONSTRUCT_SSE_CONST
237 } else if (tarval_is_one(tv)) {
238 int cnst = mode == mode_F ? 26 : 55;
239 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
240 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
241 ir_node *pslld, *psrld;
243 load = new_bd_ia32_xAllOnes(dbgi, block);
244 set_ia32_ls_mode(load, mode);
245 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
246 set_ia32_ls_mode(pslld, mode);
247 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
248 set_ia32_ls_mode(psrld, mode);
250 #endif /* CONSTRUCT_SSE_CONST */
251 } else if (mode == mode_F) {
252 /* we can place any 32bit constant by using a movd gp, sse */
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
257 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
258 load = new_bd_ia32_xMovd(dbgi, block, cnst);
259 set_ia32_ls_mode(load, mode);
262 #ifdef CONSTRUCT_SSE_CONST
263 if (mode == mode_D) {
264 unsigned val = get_tarval_sub_bits(tv, 0) |
265 (get_tarval_sub_bits(tv, 1) << 8) |
266 (get_tarval_sub_bits(tv, 2) << 16) |
267 (get_tarval_sub_bits(tv, 3) << 24);
269 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
270 ir_node *cnst, *psllq;
272 /* fine, lower 32bit are zero, produce 32bit value */
273 val = get_tarval_sub_bits(tv, 4) |
274 (get_tarval_sub_bits(tv, 5) << 8) |
275 (get_tarval_sub_bits(tv, 6) << 16) |
276 (get_tarval_sub_bits(tv, 7) << 24);
277 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
278 load = new_bd_ia32_xMovd(dbgi, block, cnst);
279 set_ia32_ls_mode(load, mode);
280 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
281 set_ia32_ls_mode(psllq, mode);
286 #endif /* CONSTRUCT_SSE_CONST */
287 floatent = ia32_create_float_const_entity(node);
289 base = get_symconst_base();
290 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
292 set_ia32_op_type(load, ia32_AddrModeS);
293 set_ia32_am_sc(load, floatent);
294 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
295 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
298 if (is_Const_null(node)) {
299 load = new_bd_ia32_vfldz(dbgi, block);
301 set_ia32_ls_mode(load, mode);
302 } else if (is_Const_one(node)) {
303 load = new_bd_ia32_vfld1(dbgi, block);
305 set_ia32_ls_mode(load, mode);
310 floatent = ia32_create_float_const_entity(node);
311 /* create_float_const_ent is smart and sometimes creates
313 ls_mode = get_type_mode(get_entity_type(floatent));
314 base = get_symconst_base();
315 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
317 set_ia32_op_type(load, ia32_AddrModeS);
318 set_ia32_am_sc(load, floatent);
319 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
320 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
323 #ifdef CONSTRUCT_SSE_CONST
325 #endif /* CONSTRUCT_SSE_CONST */
326 SET_IA32_ORIG_NODE(load, node);
328 be_dep_on_frame(load);
330 } else { /* non-float mode */
332 ir_tarval *tv = get_Const_tarval(node);
335 tv = tarval_convert_to(tv, mode_Iu);
337 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
339 panic("couldn't convert constant tarval (%+F)", node);
341 val = get_tarval_long(tv);
343 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
344 SET_IA32_ORIG_NODE(cnst, node);
346 be_dep_on_frame(cnst);
352 * Transforms a SymConst.
354 static ir_node *gen_SymConst(ir_node *node)
356 ir_node *old_block = get_nodes_block(node);
357 ir_node *block = be_transform_node(old_block);
358 dbg_info *dbgi = get_irn_dbg_info(node);
359 ir_mode *mode = get_irn_mode(node);
362 if (mode_is_float(mode)) {
363 if (ia32_cg_config.use_sse2)
364 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
367 set_ia32_am_sc(cnst, get_SymConst_entity(node));
368 set_ia32_use_frame(cnst);
372 if (get_SymConst_kind(node) != symconst_addr_ent) {
373 panic("backend only support symconst_addr_ent (at %+F)", node);
375 entity = get_SymConst_entity(node);
376 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
379 SET_IA32_ORIG_NODE(cnst, node);
381 be_dep_on_frame(cnst);
386 * Create a float type for the given mode and cache it.
388 * @param mode the mode for the float type (might be integer mode for SSE2 types)
389 * @param align alignment
391 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
397 if (mode == mode_Iu) {
398 static ir_type *int_Iu[16] = {NULL, };
400 if (int_Iu[align] == NULL) {
401 int_Iu[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return int_Iu[align];
406 } else if (mode == mode_Lu) {
407 static ir_type *int_Lu[16] = {NULL, };
409 if (int_Lu[align] == NULL) {
410 int_Lu[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return int_Lu[align];
415 } else if (mode == mode_F) {
416 static ir_type *float_F[16] = {NULL, };
418 if (float_F[align] == NULL) {
419 float_F[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_F[align];
424 } else if (mode == mode_D) {
425 static ir_type *float_D[16] = {NULL, };
427 if (float_D[align] == NULL) {
428 float_D[align] = tp = new_type_primitive(mode);
429 /* set the specified alignment */
430 set_type_alignment_bytes(tp, align);
432 return float_D[align];
434 static ir_type *float_E[16] = {NULL, };
436 if (float_E[align] == NULL) {
437 float_E[align] = tp = new_type_primitive(mode);
438 /* set the specified alignment */
439 set_type_alignment_bytes(tp, align);
441 return float_E[align];
446 * Create a float[2] array type for the given atomic type.
448 * @param tp the atomic type
450 static ir_type *ia32_create_float_array(ir_type *tp)
452 ir_mode *mode = get_type_mode(tp);
453 unsigned align = get_type_alignment_bytes(tp);
458 if (mode == mode_F) {
459 static ir_type *float_F[16] = {NULL, };
461 if (float_F[align] != NULL)
462 return float_F[align];
463 arr = float_F[align] = new_type_array(1, tp);
464 } else if (mode == mode_D) {
465 static ir_type *float_D[16] = {NULL, };
467 if (float_D[align] != NULL)
468 return float_D[align];
469 arr = float_D[align] = new_type_array(1, tp);
471 static ir_type *float_E[16] = {NULL, };
473 if (float_E[align] != NULL)
474 return float_E[align];
475 arr = float_E[align] = new_type_array(1, tp);
477 set_type_alignment_bytes(arr, align);
478 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
479 set_type_state(arr, layout_fixed);
483 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
484 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
486 static const struct {
487 const char *ent_name;
488 const char *cnst_str;
491 } names [ia32_known_const_max] = {
492 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
493 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
494 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
495 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
496 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
498 static ir_entity *ent_cache[ia32_known_const_max];
500 const char *ent_name, *cnst_str;
506 ent_name = names[kct].ent_name;
507 if (! ent_cache[kct]) {
508 cnst_str = names[kct].cnst_str;
510 switch (names[kct].mode) {
511 case 0: mode = mode_Iu; break;
512 case 1: mode = mode_Lu; break;
513 default: mode = mode_F; break;
515 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
516 tp = ia32_create_float_type(mode, names[kct].align);
518 if (kct == ia32_ULLBIAS)
519 tp = ia32_create_float_array(tp);
520 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
522 set_entity_ld_ident(ent, get_entity_ident(ent));
523 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
524 set_entity_visibility(ent, ir_visibility_private);
526 if (kct == ia32_ULLBIAS) {
527 ir_initializer_t *initializer = create_initializer_compound(2);
529 set_initializer_compound_value(initializer, 0,
530 create_initializer_tarval(get_mode_null(mode)));
531 set_initializer_compound_value(initializer, 1,
532 create_initializer_tarval(tv));
534 set_entity_initializer(ent, initializer);
536 set_entity_initializer(ent, create_initializer_tarval(tv));
539 /* cache the entry */
540 ent_cache[kct] = ent;
543 return ent_cache[kct];
547 * return true if the node is a Proj(Load) and could be used in source address
548 * mode for another node. Will return only true if the @p other node is not
549 * dependent on the memory of the Load (for binary operations use the other
550 * input here, for unary operations use NULL).
552 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
553 ir_node *other, ir_node *other2, match_flags_t flags)
558 /* float constants are always available */
559 if (is_Const(node)) {
560 ir_mode *mode = get_irn_mode(node);
561 if (mode_is_float(mode)) {
562 if (ia32_cg_config.use_sse2) {
563 if (is_simple_sse_Const(node))
566 if (is_simple_x87_Const(node))
569 if (get_irn_n_edges(node) > 1)
577 load = get_Proj_pred(node);
578 pn = get_Proj_proj(node);
579 if (!is_Load(load) || pn != pn_Load_res)
581 if (get_nodes_block(load) != block)
583 /* we only use address mode if we're the only user of the load */
584 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
586 /* in some edge cases with address mode we might reach the load normally
587 * and through some AM sequence, if it is already materialized then we
588 * can't create an AM node from it */
589 if (be_is_transformed(node))
592 /* don't do AM if other node inputs depend on the load (via mem-proj) */
593 if (other != NULL && ia32_prevents_AM(block, load, other))
596 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
602 typedef struct ia32_address_mode_t ia32_address_mode_t;
603 struct ia32_address_mode_t {
608 ia32_op_type_t op_type;
612 unsigned commutative : 1;
613 unsigned ins_permuted : 1;
616 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
618 /* construct load address */
619 memset(addr, 0, sizeof(addr[0]));
620 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
622 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
623 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
624 addr->mem = be_transform_node(mem);
627 static void build_address(ia32_address_mode_t *am, ir_node *node,
628 ia32_create_am_flags_t flags)
630 ia32_address_t *addr = &am->addr;
636 /* floating point immediates */
637 if (is_Const(node)) {
638 ir_entity *entity = ia32_create_float_const_entity(node);
639 addr->base = get_symconst_base();
640 addr->index = noreg_GP;
642 addr->symconst_ent = entity;
644 am->ls_mode = get_type_mode(get_entity_type(entity));
645 am->pinned = op_pin_state_floats;
649 load = get_Proj_pred(node);
650 ptr = get_Load_ptr(load);
651 mem = get_Load_mem(load);
652 new_mem = be_transform_node(mem);
653 am->pinned = get_irn_pinned(load);
654 am->ls_mode = get_Load_mode(load);
655 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
658 /* construct load address */
659 ia32_create_address_mode(addr, ptr, flags);
661 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
662 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
666 static void set_address(ir_node *node, const ia32_address_t *addr)
668 set_ia32_am_scale(node, addr->scale);
669 set_ia32_am_sc(node, addr->symconst_ent);
670 set_ia32_am_offs_int(node, addr->offset);
671 if (addr->symconst_sign)
672 set_ia32_am_sc_sign(node);
674 set_ia32_use_frame(node);
675 set_ia32_frame_ent(node, addr->frame_entity);
679 * Apply attributes of a given address mode to a node.
681 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
683 set_address(node, &am->addr);
685 set_ia32_op_type(node, am->op_type);
686 set_ia32_ls_mode(node, am->ls_mode);
687 if (am->pinned == op_pin_state_pinned) {
688 /* beware: some nodes are already pinned and did not allow to change the state */
689 if (get_irn_pinned(node) != op_pin_state_pinned)
690 set_irn_pinned(node, op_pin_state_pinned);
693 set_ia32_commutative(node);
697 * Check, if a given node is a Down-Conv, ie. a integer Conv
698 * from a mode with a mode with more bits to a mode with lesser bits.
699 * Moreover, we return only true if the node has not more than 1 user.
701 * @param node the node
702 * @return non-zero if node is a Down-Conv
704 static int is_downconv(const ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
726 /** Skip all Down-Conv's on a given node and return the resulting node. */
727 ir_node *ia32_skip_downconv(ir_node *node)
729 while (is_downconv(node))
730 node = get_Conv_op(node);
735 static bool is_sameconv(ir_node *node)
743 /* we only want to skip the conv when we're the only user
744 * (because this test is used in the context of address-mode selection
745 * and we don't want to use address mode for multiple users) */
746 if (get_irn_n_edges(node) > 1)
749 src_mode = get_irn_mode(get_Conv_op(node));
750 dest_mode = get_irn_mode(node);
752 ia32_mode_needs_gp_reg(src_mode) &&
753 ia32_mode_needs_gp_reg(dest_mode) &&
754 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
757 /** Skip all signedness convs */
758 static ir_node *ia32_skip_sameconv(ir_node *node)
760 while (is_sameconv(node))
761 node = get_Conv_op(node);
766 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
768 ir_mode *mode = get_irn_mode(node);
773 if (mode_is_signed(mode)) {
778 block = get_nodes_block(node);
779 dbgi = get_irn_dbg_info(node);
781 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
785 * matches operands of a node into ia32 addressing/operand modes. This covers
786 * usage of source address mode, immediates, operations with non 32-bit modes,
788 * The resulting data is filled into the @p am struct. block is the block
789 * of the node whose arguments are matched. op1, op2 are the first and second
790 * input that are matched (op1 may be NULL). other_op is another unrelated
791 * input that is not matched! but which is needed sometimes to check if AM
792 * for op1/op2 is legal.
793 * @p flags describes the supported modes of the operation in detail.
795 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
796 ir_node *op1, ir_node *op2, ir_node *other_op,
799 ia32_address_t *addr = &am->addr;
800 ir_mode *mode = get_irn_mode(op2);
801 int mode_bits = get_mode_size_bits(mode);
802 ir_node *new_op1, *new_op2;
804 unsigned commutative;
805 int use_am_and_immediates;
808 memset(am, 0, sizeof(am[0]));
810 commutative = (flags & match_commutative) != 0;
811 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
812 use_am = (flags & match_am) != 0;
813 use_immediate = (flags & match_immediate) != 0;
814 assert(!use_am_and_immediates || use_immediate);
817 assert(!commutative || op1 != NULL);
818 assert(use_am || !(flags & match_8bit_am));
819 assert(use_am || !(flags & match_16bit_am));
821 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
822 (mode_bits == 16 && !(flags & match_16bit_am))) {
826 /* we can simply skip downconvs for mode neutral nodes: the upper bits
827 * can be random for these operations */
828 if (flags & match_mode_neutral) {
829 op2 = ia32_skip_downconv(op2);
831 op1 = ia32_skip_downconv(op1);
834 op2 = ia32_skip_sameconv(op2);
836 op1 = ia32_skip_sameconv(op1);
840 /* match immediates. firm nodes are normalized: constants are always on the
843 if (!(flags & match_try_am) && use_immediate) {
844 new_op2 = ia32_try_create_Immediate(op2, 0);
847 if (new_op2 == NULL &&
848 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
849 build_address(am, op2, ia32_create_am_normal);
850 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
851 if (mode_is_float(mode)) {
852 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
856 am->op_type = ia32_AddrModeS;
857 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
859 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
861 build_address(am, op1, ia32_create_am_normal);
863 if (mode_is_float(mode)) {
864 noreg = ia32_new_NoReg_vfp(current_ir_graph);
869 if (new_op2 != NULL) {
872 new_op1 = be_transform_node(op2);
874 am->ins_permuted = true;
876 am->op_type = ia32_AddrModeS;
879 am->op_type = ia32_Normal;
881 if (flags & match_try_am) {
887 mode = get_irn_mode(op2);
888 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
889 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
891 new_op2 = create_upconv(op2, NULL);
892 am->ls_mode = mode_Iu;
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
897 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1037 if (initial_fpcw != NULL)
1038 return initial_fpcw;
1040 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(current_ir_graph),
1041 &ia32_registers[REG_FPCW]);
1042 initial_fpcw = be_transform_node(fpcw);
1044 return initial_fpcw;
1048 * Construct a standard binary operation, set AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_binop_float_func *func)
1058 ir_mode *mode = get_irn_mode(node);
1060 ir_node *block, *new_block, *new_node;
1061 ia32_address_mode_t am;
1062 ia32_address_t *addr = &am.addr;
1063 ia32_x87_attr_t *attr;
1064 /* All operations are considered commutative, because there are reverse
1066 match_flags_t flags = match_commutative;
1068 /* happens for div nodes... */
1069 if (mode == mode_T) {
1071 mode = get_Div_resmode(node);
1073 panic("can't determine mode");
1076 /* cannot use address mode with long double on x87 */
1077 if (get_mode_size_bits(mode) <= 64)
1080 block = get_nodes_block(node);
1081 match_arguments(&am, block, op1, op2, NULL, flags);
1083 dbgi = get_irn_dbg_info(node);
1084 new_block = be_transform_node(block);
1085 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1086 am.new_op1, am.new_op2, get_fpcw());
1087 set_am_attributes(new_node, &am);
1089 attr = get_ia32_x87_attr(new_node);
1090 attr->attr.data.ins_permuted = am.ins_permuted;
1092 SET_IA32_ORIG_NODE(new_node, node);
1094 new_node = fix_mem_proj(new_node, &am);
1100 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1102 * @param op1 The first operand
1103 * @param op2 The second operand
1104 * @param func The node constructor function
1105 * @return The constructed ia32 node.
1107 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1108 construct_shift_func *func,
1109 match_flags_t flags)
1112 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1114 assert(! mode_is_float(get_irn_mode(node)));
1115 assert(flags & match_immediate);
1116 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1118 if (flags & match_mode_neutral) {
1119 op1 = ia32_skip_downconv(op1);
1120 new_op1 = be_transform_node(op1);
1121 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1122 new_op1 = create_upconv(op1, node);
1124 new_op1 = be_transform_node(op1);
1127 /* the shift amount can be any mode that is bigger than 5 bits, since all
1128 * other bits are ignored anyway */
1129 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1130 ir_node *const op = get_Conv_op(op2);
1131 if (mode_is_float(get_irn_mode(op)))
1134 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1136 new_op2 = create_immediate_or_transform(op2, 0);
1138 dbgi = get_irn_dbg_info(node);
1139 block = get_nodes_block(node);
1140 new_block = be_transform_node(block);
1141 new_node = func(dbgi, new_block, new_op1, new_op2);
1142 SET_IA32_ORIG_NODE(new_node, node);
1144 /* lowered shift instruction may have a dependency operand, handle it here */
1145 if (get_irn_arity(node) == 3) {
1146 /* we have a dependency */
1147 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1148 add_irn_dep(new_node, new_dep);
1156 * Construct a standard unary operation, set AM and immediate if required.
1158 * @param op The operand
1159 * @param func The node constructor function
1160 * @return The constructed ia32 node.
1162 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1163 match_flags_t flags)
1166 ir_node *block, *new_block, *new_op, *new_node;
1168 assert(flags == 0 || flags == match_mode_neutral);
1169 if (flags & match_mode_neutral) {
1170 op = ia32_skip_downconv(op);
1173 new_op = be_transform_node(op);
1174 dbgi = get_irn_dbg_info(node);
1175 block = get_nodes_block(node);
1176 new_block = be_transform_node(block);
1177 new_node = func(dbgi, new_block, new_op);
1179 SET_IA32_ORIG_NODE(new_node, node);
1184 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1185 ia32_address_t *addr)
1187 ir_node *base, *index, *res;
1193 base = be_transform_node(base);
1196 index = addr->index;
1197 if (index == NULL) {
1200 index = be_transform_node(index);
1203 res = new_bd_ia32_Lea(dbgi, block, base, index);
1204 set_address(res, addr);
1210 * Returns non-zero if a given address mode has a symbolic or
1211 * numerical offset != 0.
1213 static int am_has_immediates(const ia32_address_t *addr)
1215 return addr->offset != 0 || addr->symconst_ent != NULL
1216 || addr->frame_entity || addr->use_frame;
1220 * Creates an ia32 Add.
1222 * @return the created ia32 Add node
1224 static ir_node *gen_Add(ir_node *node)
1226 ir_mode *mode = get_irn_mode(node);
1227 ir_node *op1 = get_Add_left(node);
1228 ir_node *op2 = get_Add_right(node);
1230 ir_node *block, *new_block, *new_node, *add_immediate_op;
1231 ia32_address_t addr;
1232 ia32_address_mode_t am;
1234 if (mode_is_float(mode)) {
1235 if (ia32_cg_config.use_sse2)
1236 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1237 match_commutative | match_am);
1239 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1242 ia32_mark_non_am(node);
1244 op2 = ia32_skip_downconv(op2);
1245 op1 = ia32_skip_downconv(op1);
1249 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1250 * 1. Add with immediate -> Lea
1251 * 2. Add with possible source address mode -> Add
1252 * 3. Otherwise -> Lea
1254 memset(&addr, 0, sizeof(addr));
1255 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1256 add_immediate_op = NULL;
1258 dbgi = get_irn_dbg_info(node);
1259 block = get_nodes_block(node);
1260 new_block = be_transform_node(block);
1263 if (addr.base == NULL && addr.index == NULL) {
1264 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1265 addr.symconst_sign, 0, addr.offset);
1266 be_dep_on_frame(new_node);
1267 SET_IA32_ORIG_NODE(new_node, node);
1270 /* add with immediate? */
1271 if (addr.index == NULL) {
1272 add_immediate_op = addr.base;
1273 } else if (addr.base == NULL && addr.scale == 0) {
1274 add_immediate_op = addr.index;
1277 if (add_immediate_op != NULL) {
1278 if (!am_has_immediates(&addr)) {
1279 #ifdef DEBUG_libfirm
1280 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1283 return be_transform_node(add_immediate_op);
1286 new_node = create_lea_from_address(dbgi, new_block, &addr);
1287 SET_IA32_ORIG_NODE(new_node, node);
1291 /* test if we can use source address mode */
1292 match_arguments(&am, block, op1, op2, NULL, match_commutative
1293 | match_mode_neutral | match_am | match_immediate | match_try_am);
1295 /* construct an Add with source address mode */
1296 if (am.op_type == ia32_AddrModeS) {
1297 ia32_address_t *am_addr = &am.addr;
1298 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1299 am_addr->index, am_addr->mem, am.new_op1,
1301 set_am_attributes(new_node, &am);
1302 SET_IA32_ORIG_NODE(new_node, node);
1304 new_node = fix_mem_proj(new_node, &am);
1309 /* otherwise construct a lea */
1310 new_node = create_lea_from_address(dbgi, new_block, &addr);
1311 SET_IA32_ORIG_NODE(new_node, node);
1316 * Creates an ia32 Mul.
1318 * @return the created ia32 Mul node
1320 static ir_node *gen_Mul(ir_node *node)
1322 ir_node *op1 = get_Mul_left(node);
1323 ir_node *op2 = get_Mul_right(node);
1324 ir_mode *mode = get_irn_mode(node);
1326 if (mode_is_float(mode)) {
1327 if (ia32_cg_config.use_sse2)
1328 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1329 match_commutative | match_am);
1331 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1333 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1334 match_commutative | match_am | match_mode_neutral |
1335 match_immediate | match_am_and_immediates);
1339 * Creates an ia32 Mulh.
1340 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1341 * this result while Mul returns the lower 32 bit.
1343 * @return the created ia32 Mulh node
1345 static ir_node *gen_Mulh(ir_node *node)
1347 dbg_info *dbgi = get_irn_dbg_info(node);
1348 ir_node *op1 = get_Mulh_left(node);
1349 ir_node *op2 = get_Mulh_right(node);
1350 ir_mode *mode = get_irn_mode(node);
1352 ir_node *proj_res_high;
1354 if (get_mode_size_bits(mode) != 32) {
1355 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1358 if (mode_is_signed(mode)) {
1359 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1360 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1362 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1363 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1365 return proj_res_high;
1369 * Creates an ia32 And.
1371 * @return The created ia32 And node
1373 static ir_node *gen_And(ir_node *node)
1375 ir_node *op1 = get_And_left(node);
1376 ir_node *op2 = get_And_right(node);
1377 assert(! mode_is_float(get_irn_mode(node)));
1379 /* is it a zero extension? */
1380 if (is_Const(op2)) {
1381 ir_tarval *tv = get_Const_tarval(op2);
1382 long v = get_tarval_long(tv);
1384 if (v == 0xFF || v == 0xFFFF) {
1385 dbg_info *dbgi = get_irn_dbg_info(node);
1386 ir_node *block = get_nodes_block(node);
1393 assert(v == 0xFFFF);
1396 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1401 return gen_binop(node, op1, op2, new_bd_ia32_And,
1402 match_commutative | match_mode_neutral | match_am | match_immediate);
1408 * Creates an ia32 Or.
1410 * @return The created ia32 Or node
1412 static ir_node *gen_Or(ir_node *node)
1414 ir_node *op1 = get_Or_left(node);
1415 ir_node *op2 = get_Or_right(node);
1417 assert (! mode_is_float(get_irn_mode(node)));
1418 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1419 | match_mode_neutral | match_am | match_immediate);
1425 * Creates an ia32 Eor.
1427 * @return The created ia32 Eor node
1429 static ir_node *gen_Eor(ir_node *node)
1431 ir_node *op1 = get_Eor_left(node);
1432 ir_node *op2 = get_Eor_right(node);
1434 assert(! mode_is_float(get_irn_mode(node)));
1435 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1436 | match_mode_neutral | match_am | match_immediate);
1441 * Creates an ia32 Sub.
1443 * @return The created ia32 Sub node
1445 static ir_node *gen_Sub(ir_node *node)
1447 ir_node *op1 = get_Sub_left(node);
1448 ir_node *op2 = get_Sub_right(node);
1449 ir_mode *mode = get_irn_mode(node);
1451 if (mode_is_float(mode)) {
1452 if (ia32_cg_config.use_sse2)
1453 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1455 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1458 if (is_Const(op2)) {
1459 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1463 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1464 | match_am | match_immediate);
1467 static ir_node *transform_AM_mem(ir_node *const block,
1468 ir_node *const src_val,
1469 ir_node *const src_mem,
1470 ir_node *const am_mem)
1472 if (is_NoMem(am_mem)) {
1473 return be_transform_node(src_mem);
1474 } else if (is_Proj(src_val) &&
1476 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1477 /* avoid memory loop */
1479 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1480 ir_node *const ptr_pred = get_Proj_pred(src_val);
1481 int const arity = get_Sync_n_preds(src_mem);
1486 NEW_ARR_A(ir_node*, ins, arity + 1);
1488 /* NOTE: This sometimes produces dead-code because the old sync in
1489 * src_mem might not be used anymore, we should detect this case
1490 * and kill the sync... */
1491 for (i = arity - 1; i >= 0; --i) {
1492 ir_node *const pred = get_Sync_pred(src_mem, i);
1494 /* avoid memory loop */
1495 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1498 ins[n++] = be_transform_node(pred);
1501 if (n==1 && ins[0] == am_mem) {
1503 /* creating a new Sync and relying on CSE may fail,
1504 * if am_mem is a ProjM, which does not yet verify. */
1508 return new_r_Sync(block, n, ins);
1512 ins[0] = be_transform_node(src_mem);
1514 return new_r_Sync(block, 2, ins);
1519 * Create a 32bit to 64bit signed extension.
1521 * @param dbgi debug info
1522 * @param block the block where node nodes should be placed
1523 * @param val the value to extend
1524 * @param orig the original node
1526 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1527 ir_node *val, const ir_node *orig)
1532 if (ia32_cg_config.use_short_sex_eax) {
1533 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1534 be_dep_on_frame(pval);
1535 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1537 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1538 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1540 SET_IA32_ORIG_NODE(res, orig);
1545 * Generates an ia32 Div with additional infrastructure for the
1546 * register allocator if needed.
1548 static ir_node *create_Div(ir_node *node)
1550 dbg_info *dbgi = get_irn_dbg_info(node);
1551 ir_node *block = get_nodes_block(node);
1552 ir_node *new_block = be_transform_node(block);
1559 ir_node *sign_extension;
1560 ia32_address_mode_t am;
1561 ia32_address_t *addr = &am.addr;
1563 /* the upper bits have random contents for smaller modes */
1564 switch (get_irn_opcode(node)) {
1566 op1 = get_Div_left(node);
1567 op2 = get_Div_right(node);
1568 mem = get_Div_mem(node);
1569 mode = get_Div_resmode(node);
1572 op1 = get_Mod_left(node);
1573 op2 = get_Mod_right(node);
1574 mem = get_Mod_mem(node);
1575 mode = get_Mod_resmode(node);
1578 panic("invalid divmod node %+F", node);
1581 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1583 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1584 is the memory of the consumed address. We can have only the second op as address
1585 in Div nodes, so check only op2. */
1586 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1588 if (mode_is_signed(mode)) {
1589 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1590 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1591 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1593 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1594 be_dep_on_frame(sign_extension);
1596 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1597 addr->index, new_mem, am.new_op2,
1598 am.new_op1, sign_extension);
1601 set_irn_pinned(new_node, get_irn_pinned(node));
1603 set_am_attributes(new_node, &am);
1604 SET_IA32_ORIG_NODE(new_node, node);
1606 new_node = fix_mem_proj(new_node, &am);
1612 * Generates an ia32 Mod.
1614 static ir_node *gen_Mod(ir_node *node)
1616 return create_Div(node);
1620 * Generates an ia32 Div.
1622 static ir_node *gen_Div(ir_node *node)
1624 ir_mode *mode = get_Div_resmode(node);
1625 if (mode_is_float(mode)) {
1626 ir_node *op1 = get_Div_left(node);
1627 ir_node *op2 = get_Div_right(node);
1629 if (ia32_cg_config.use_sse2) {
1630 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1632 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1636 return create_Div(node);
1640 * Creates an ia32 Shl.
1642 * @return The created ia32 Shl node
1644 static ir_node *gen_Shl(ir_node *node)
1646 ir_node *left = get_Shl_left(node);
1647 ir_node *right = get_Shl_right(node);
1649 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1650 match_mode_neutral | match_immediate);
1654 * Creates an ia32 Shr.
1656 * @return The created ia32 Shr node
1658 static ir_node *gen_Shr(ir_node *node)
1660 ir_node *left = get_Shr_left(node);
1661 ir_node *right = get_Shr_right(node);
1663 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1669 * Creates an ia32 Sar.
1671 * @return The created ia32 Shrs node
1673 static ir_node *gen_Shrs(ir_node *node)
1675 ir_node *left = get_Shrs_left(node);
1676 ir_node *right = get_Shrs_right(node);
1678 if (is_Const(right)) {
1679 ir_tarval *tv = get_Const_tarval(right);
1680 long val = get_tarval_long(tv);
1682 /* this is a sign extension */
1683 dbg_info *dbgi = get_irn_dbg_info(node);
1684 ir_node *block = be_transform_node(get_nodes_block(node));
1685 ir_node *new_op = be_transform_node(left);
1687 return create_sex_32_64(dbgi, block, new_op, node);
1691 /* 8 or 16 bit sign extension? */
1692 if (is_Const(right) && is_Shl(left)) {
1693 ir_node *shl_left = get_Shl_left(left);
1694 ir_node *shl_right = get_Shl_right(left);
1695 if (is_Const(shl_right)) {
1696 ir_tarval *tv1 = get_Const_tarval(right);
1697 ir_tarval *tv2 = get_Const_tarval(shl_right);
1698 if (tv1 == tv2 && tarval_is_long(tv1)) {
1699 long val = get_tarval_long(tv1);
1700 if (val == 16 || val == 24) {
1701 dbg_info *dbgi = get_irn_dbg_info(node);
1702 ir_node *block = get_nodes_block(node);
1712 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1721 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1727 * Creates an ia32 Rol.
1729 * @param op1 The first operator
1730 * @param op2 The second operator
1731 * @return The created ia32 RotL node
1733 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1735 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1741 * Creates an ia32 Ror.
1742 * NOTE: There is no RotR with immediate because this would always be a RotL
1743 * "imm-mode_size_bits" which can be pre-calculated.
1745 * @param op1 The first operator
1746 * @param op2 The second operator
1747 * @return The created ia32 RotR node
1749 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1751 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1757 * Creates an ia32 RotR or RotL (depending on the found pattern).
1759 * @return The created ia32 RotL or RotR node
1761 static ir_node *gen_Rotl(ir_node *node)
1763 ir_node *op1 = get_Rotl_left(node);
1764 ir_node *op2 = get_Rotl_right(node);
1766 if (is_Minus(op2)) {
1767 return gen_Ror(node, op1, get_Minus_op(op2));
1770 return gen_Rol(node, op1, op2);
1776 * Transforms a Minus node.
1778 * @return The created ia32 Minus node
1780 static ir_node *gen_Minus(ir_node *node)
1782 ir_node *op = get_Minus_op(node);
1783 ir_node *block = be_transform_node(get_nodes_block(node));
1784 dbg_info *dbgi = get_irn_dbg_info(node);
1785 ir_mode *mode = get_irn_mode(node);
1790 if (mode_is_float(mode)) {
1791 ir_node *new_op = be_transform_node(op);
1792 if (ia32_cg_config.use_sse2) {
1793 /* TODO: non-optimal... if we have many xXors, then we should
1794 * rather create a load for the const and use that instead of
1795 * several AM nodes... */
1796 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1798 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1799 noreg_GP, nomem, new_op, noreg_xmm);
1801 size = get_mode_size_bits(mode);
1802 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1804 set_ia32_am_sc(new_node, ent);
1805 set_ia32_op_type(new_node, ia32_AddrModeS);
1806 set_ia32_ls_mode(new_node, mode);
1808 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1811 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1814 SET_IA32_ORIG_NODE(new_node, node);
1820 * Transforms a Not node.
1822 * @return The created ia32 Not node
1824 static ir_node *gen_Not(ir_node *node)
1826 ir_node *op = get_Not_op(node);
1828 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1829 assert (! mode_is_float(get_irn_mode(node)));
1831 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1834 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1835 bool negate, ir_node *node)
1837 ir_node *new_block = be_transform_node(block);
1838 ir_mode *mode = get_irn_mode(op);
1844 if (mode_is_float(mode)) {
1845 new_op = be_transform_node(op);
1847 if (ia32_cg_config.use_sse2) {
1848 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1849 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1850 noreg_GP, nomem, new_op, noreg_fp);
1852 size = get_mode_size_bits(mode);
1853 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1855 set_ia32_am_sc(new_node, ent);
1857 SET_IA32_ORIG_NODE(new_node, node);
1859 set_ia32_op_type(new_node, ia32_AddrModeS);
1860 set_ia32_ls_mode(new_node, mode);
1862 /* TODO, implement -Abs case */
1865 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1866 SET_IA32_ORIG_NODE(new_node, node);
1868 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1869 SET_IA32_ORIG_NODE(new_node, node);
1874 ir_node *sign_extension;
1876 if (get_mode_size_bits(mode) == 32) {
1877 new_op = be_transform_node(op);
1879 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1882 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1884 xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1885 nomem, new_op, sign_extension);
1886 SET_IA32_ORIG_NODE(xorn, node);
1889 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1890 nomem, sign_extension, xorn);
1892 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1893 nomem, xorn, sign_extension);
1895 SET_IA32_ORIG_NODE(new_node, node);
1902 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1904 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1906 dbg_info *dbgi = get_irn_dbg_info(cmp);
1907 ir_node *block = get_nodes_block(cmp);
1908 ir_node *new_block = be_transform_node(block);
1909 ir_node *op1 = be_transform_node(x);
1910 ir_node *op2 = be_transform_node(n);
1912 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1915 static ia32_condition_code_t pnc_to_condition_code(pn_Cmp pnc, ir_mode *mode)
1917 if (mode_is_float(mode)) {
1919 case pn_Cmp_Eq: return ia32_cc_float_equal;
1920 case pn_Cmp_Lt: return ia32_cc_float_below;
1921 case pn_Cmp_Le: return ia32_cc_float_below_equal;
1922 case pn_Cmp_Gt: return ia32_cc_float_above;
1923 case pn_Cmp_Ge: return ia32_cc_float_above_equal;
1924 case pn_Cmp_Lg: return ia32_cc_not_equal;
1925 case pn_Cmp_Leg: return ia32_cc_not_parity;
1926 case pn_Cmp_Uo: return ia32_cc_parity;
1927 case pn_Cmp_Ue: return ia32_cc_equal;
1928 case pn_Cmp_Ul: return ia32_cc_float_unordered_below;
1929 case pn_Cmp_Ule: return ia32_cc_float_unordered_below_equal;
1930 case pn_Cmp_Ug: return ia32_cc_float_unordered_above;
1931 case pn_Cmp_Uge: return ia32_cc_float_unordered_above_equal;
1932 case pn_Cmp_Ne: return ia32_cc_float_not_equal;
1936 /* should we introduce a jump always/jump never? */
1939 panic("Unexpected float pnc");
1940 } else if (mode_is_signed(mode)) {
1943 case pn_Cmp_Eq: return ia32_cc_equal;
1945 case pn_Cmp_Lt: return ia32_cc_less;
1947 case pn_Cmp_Le: return ia32_cc_less_equal;
1949 case pn_Cmp_Gt: return ia32_cc_greater;
1951 case pn_Cmp_Ge: return ia32_cc_greater_equal;
1953 case pn_Cmp_Ne: return ia32_cc_not_equal;
1959 /* introduce jump always/jump never? */
1962 panic("Unexpected pnc");
1966 case pn_Cmp_Eq: return ia32_cc_equal;
1968 case pn_Cmp_Lt: return ia32_cc_below;
1970 case pn_Cmp_Le: return ia32_cc_below_equal;
1972 case pn_Cmp_Gt: return ia32_cc_above;
1974 case pn_Cmp_Ge: return ia32_cc_above_equal;
1976 case pn_Cmp_Ne: return ia32_cc_not_equal;
1982 /* introduce jump always/jump never? */
1985 panic("Unexpected pnc");
1989 static ir_node *get_flags_mode_b(ir_node *node, ia32_condition_code_t *cc_out)
1991 /* a mode_b value, we have to compare it against 0 */
1992 dbg_info *dbgi = get_irn_dbg_info(node);
1993 ir_node *new_block = be_transform_node(get_nodes_block(node));
1994 ir_node *new_op = be_transform_node(node);
1995 ir_node *flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op, new_op, false);
1996 *cc_out = ia32_cc_not_equal;
2000 static ir_node *get_flags_node_cmp(ir_node *node, ia32_condition_code_t *cc_out)
2002 /* must have a Proj(Cmp) as input */
2003 ir_node *cmp = get_Proj_pred(node);
2004 int pnc = get_Proj_pn_cmp(node);
2005 ir_node *l = get_Cmp_left(cmp);
2006 ir_mode *mode = get_irn_mode(l);
2009 /* check for bit-test */
2010 if (ia32_cg_config.use_bt
2011 && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq || pnc == pn_Cmp_Ne
2012 || pnc == pn_Cmp_Ue)) {
2013 ir_node *l = get_Cmp_left(cmp);
2014 ir_node *r = get_Cmp_right(cmp);
2016 ir_node *la = get_And_left(l);
2017 ir_node *ra = get_And_right(l);
2024 ir_node *c = get_Shl_left(la);
2025 if (is_Const_1(c) && is_Const_0(r)) {
2026 /* (1 << n) & ra) */
2027 ir_node *n = get_Shl_right(la);
2028 flags = gen_bt(cmp, ra, n);
2029 /* the bit is copied into the CF flag */
2030 if (pnc & pn_Cmp_Eq)
2031 *cc_out = ia32_cc_below; /* ==0, so we test for CF=1 */
2033 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2040 /* just do a normal transformation of the Cmp */
2041 *cc_out = pnc_to_condition_code(pnc, mode);
2042 flags = be_transform_node(cmp);
2047 * Transform a node returning a "flag" result.
2049 * @param node the node to transform
2050 * @param cc_out the compare mode to use
2052 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2054 if (is_Proj(node) && is_Cmp(get_Proj_pred(node)))
2055 return get_flags_node_cmp(node, cc_out);
2056 assert(get_irn_mode(node) == mode_b);
2057 return get_flags_mode_b(node, cc_out);
2061 * Transforms a Load.
2063 * @return the created ia32 Load node
2065 static ir_node *gen_Load(ir_node *node)
2067 ir_node *old_block = get_nodes_block(node);
2068 ir_node *block = be_transform_node(old_block);
2069 ir_node *ptr = get_Load_ptr(node);
2070 ir_node *mem = get_Load_mem(node);
2071 ir_node *new_mem = be_transform_node(mem);
2074 dbg_info *dbgi = get_irn_dbg_info(node);
2075 ir_mode *mode = get_Load_mode(node);
2077 ia32_address_t addr;
2079 /* construct load address */
2080 memset(&addr, 0, sizeof(addr));
2081 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2088 base = be_transform_node(base);
2091 if (index == NULL) {
2094 index = be_transform_node(index);
2097 if (mode_is_float(mode)) {
2098 if (ia32_cg_config.use_sse2) {
2099 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2102 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2106 assert(mode != mode_b);
2108 /* create a conv node with address mode for smaller modes */
2109 if (get_mode_size_bits(mode) < 32) {
2110 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2111 new_mem, noreg_GP, mode);
2113 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2117 set_irn_pinned(new_node, get_irn_pinned(node));
2118 set_ia32_op_type(new_node, ia32_AddrModeS);
2119 set_ia32_ls_mode(new_node, mode);
2120 set_address(new_node, &addr);
2122 if (get_irn_pinned(node) == op_pin_state_floats) {
2123 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2124 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2125 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2126 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2129 SET_IA32_ORIG_NODE(new_node, node);
2131 be_dep_on_frame(new_node);
2135 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2136 ir_node *ptr, ir_node *other)
2143 /* we only use address mode if we're the only user of the load */
2144 if (get_irn_n_edges(node) > 1)
2147 load = get_Proj_pred(node);
2150 if (get_nodes_block(load) != block)
2153 /* store should have the same pointer as the load */
2154 if (get_Load_ptr(load) != ptr)
2157 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2158 if (other != NULL &&
2159 get_nodes_block(other) == block &&
2160 heights_reachable_in_block(ia32_heights, other, load)) {
2164 if (ia32_prevents_AM(block, load, mem))
2166 /* Store should be attached to the load via mem */
2167 assert(heights_reachable_in_block(ia32_heights, mem, load));
2172 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2173 ir_node *mem, ir_node *ptr, ir_mode *mode,
2174 construct_binop_dest_func *func,
2175 construct_binop_dest_func *func8bit,
2176 match_flags_t flags)
2178 ir_node *src_block = get_nodes_block(node);
2186 ia32_address_mode_t am;
2187 ia32_address_t *addr = &am.addr;
2188 memset(&am, 0, sizeof(am));
2190 assert(flags & match_immediate); /* there is no destam node without... */
2191 commutative = (flags & match_commutative) != 0;
2193 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2194 build_address(&am, op1, ia32_create_am_double_use);
2195 new_op = create_immediate_or_transform(op2, 0);
2196 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2197 build_address(&am, op2, ia32_create_am_double_use);
2198 new_op = create_immediate_or_transform(op1, 0);
2203 if (addr->base == NULL)
2204 addr->base = noreg_GP;
2205 if (addr->index == NULL)
2206 addr->index = noreg_GP;
2207 if (addr->mem == NULL)
2210 dbgi = get_irn_dbg_info(node);
2211 block = be_transform_node(src_block);
2212 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2214 if (get_mode_size_bits(mode) == 8) {
2215 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2217 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2219 set_address(new_node, addr);
2220 set_ia32_op_type(new_node, ia32_AddrModeD);
2221 set_ia32_ls_mode(new_node, mode);
2222 SET_IA32_ORIG_NODE(new_node, node);
2224 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2225 mem_proj = be_transform_node(am.mem_proj);
2226 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2231 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2232 ir_node *ptr, ir_mode *mode,
2233 construct_unop_dest_func *func)
2235 ir_node *src_block = get_nodes_block(node);
2241 ia32_address_mode_t am;
2242 ia32_address_t *addr = &am.addr;
2244 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2247 memset(&am, 0, sizeof(am));
2248 build_address(&am, op, ia32_create_am_double_use);
2250 dbgi = get_irn_dbg_info(node);
2251 block = be_transform_node(src_block);
2252 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2253 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2254 set_address(new_node, addr);
2255 set_ia32_op_type(new_node, ia32_AddrModeD);
2256 set_ia32_ls_mode(new_node, mode);
2257 SET_IA32_ORIG_NODE(new_node, node);
2259 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2260 mem_proj = be_transform_node(am.mem_proj);
2261 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2266 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2268 ir_mode *mode = get_irn_mode(node);
2269 ir_node *mux_true = get_Mux_true(node);
2270 ir_node *mux_false = get_Mux_false(node);
2278 ia32_condition_code_t cc;
2279 ia32_address_t addr;
2281 if (get_mode_size_bits(mode) != 8)
2284 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2286 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2292 cond = get_Mux_sel(node);
2293 flags = get_flags_node(cond, &cc);
2294 /* we can't handle the float special cases with SetM */
2295 if (cc & ia32_cc_additional_float_cases)
2298 cc = ia32_negate_condition_code(cc);
2300 build_address_ptr(&addr, ptr, mem);
2302 dbgi = get_irn_dbg_info(node);
2303 block = get_nodes_block(node);
2304 new_block = be_transform_node(block);
2305 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2306 addr.index, addr.mem, flags, cc);
2307 set_address(new_node, &addr);
2308 set_ia32_op_type(new_node, ia32_AddrModeD);
2309 set_ia32_ls_mode(new_node, mode);
2310 SET_IA32_ORIG_NODE(new_node, node);
2315 static ir_node *try_create_dest_am(ir_node *node)
2317 ir_node *val = get_Store_value(node);
2318 ir_node *mem = get_Store_mem(node);
2319 ir_node *ptr = get_Store_ptr(node);
2320 ir_mode *mode = get_irn_mode(val);
2321 unsigned bits = get_mode_size_bits(mode);
2326 /* handle only GP modes for now... */
2327 if (!ia32_mode_needs_gp_reg(mode))
2331 /* store must be the only user of the val node */
2332 if (get_irn_n_edges(val) > 1)
2334 /* skip pointless convs */
2336 ir_node *conv_op = get_Conv_op(val);
2337 ir_mode *pred_mode = get_irn_mode(conv_op);
2338 if (!ia32_mode_needs_gp_reg(pred_mode))
2340 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2348 /* value must be in the same block */
2349 if (get_nodes_block(node) != get_nodes_block(val))
2352 switch (get_irn_opcode(val)) {
2354 op1 = get_Add_left(val);
2355 op2 = get_Add_right(val);
2356 if (ia32_cg_config.use_incdec) {
2357 if (is_Const_1(op2)) {
2358 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2360 } else if (is_Const_Minus_1(op2)) {
2361 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2365 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2366 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2367 match_commutative | match_immediate);
2370 op1 = get_Sub_left(val);
2371 op2 = get_Sub_right(val);
2372 if (is_Const(op2)) {
2373 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2375 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2376 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2380 op1 = get_And_left(val);
2381 op2 = get_And_right(val);
2382 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2383 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2384 match_commutative | match_immediate);
2387 op1 = get_Or_left(val);
2388 op2 = get_Or_right(val);
2389 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2390 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2391 match_commutative | match_immediate);
2394 op1 = get_Eor_left(val);
2395 op2 = get_Eor_right(val);
2396 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2397 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2398 match_commutative | match_immediate);
2401 op1 = get_Shl_left(val);
2402 op2 = get_Shl_right(val);
2403 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2404 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2408 op1 = get_Shr_left(val);
2409 op2 = get_Shr_right(val);
2410 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2411 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2415 op1 = get_Shrs_left(val);
2416 op2 = get_Shrs_right(val);
2417 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2418 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2422 op1 = get_Rotl_left(val);
2423 op2 = get_Rotl_right(val);
2424 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2425 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2428 /* TODO: match ROR patterns... */
2430 new_node = try_create_SetMem(val, ptr, mem);
2434 op1 = get_Minus_op(val);
2435 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2438 /* should be lowered already */
2439 assert(mode != mode_b);
2440 op1 = get_Not_op(val);
2441 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2447 if (new_node != NULL) {
2448 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2449 get_irn_pinned(node) == op_pin_state_pinned) {
2450 set_irn_pinned(new_node, op_pin_state_pinned);
2457 static bool possible_int_mode_for_fp(ir_mode *mode)
2461 if (!mode_is_signed(mode))
2463 size = get_mode_size_bits(mode);
2464 if (size != 16 && size != 32)
2469 static int is_float_to_int_conv(const ir_node *node)
2471 ir_mode *mode = get_irn_mode(node);
2475 if (!possible_int_mode_for_fp(mode))
2480 conv_op = get_Conv_op(node);
2481 conv_mode = get_irn_mode(conv_op);
2483 if (!mode_is_float(conv_mode))
2490 * Transform a Store(floatConst) into a sequence of
2493 * @return the created ia32 Store node
2495 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2497 ir_mode *mode = get_irn_mode(cns);
2498 unsigned size = get_mode_size_bytes(mode);
2499 ir_tarval *tv = get_Const_tarval(cns);
2500 ir_node *block = get_nodes_block(node);
2501 ir_node *new_block = be_transform_node(block);
2502 ir_node *ptr = get_Store_ptr(node);
2503 ir_node *mem = get_Store_mem(node);
2504 dbg_info *dbgi = get_irn_dbg_info(node);
2508 ia32_address_t addr;
2510 assert(size % 4 == 0);
2513 build_address_ptr(&addr, ptr, mem);
2517 get_tarval_sub_bits(tv, ofs) |
2518 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2519 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2520 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2521 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2523 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2524 addr.index, addr.mem, imm);
2526 set_irn_pinned(new_node, get_irn_pinned(node));
2527 set_ia32_op_type(new_node, ia32_AddrModeD);
2528 set_ia32_ls_mode(new_node, mode_Iu);
2529 set_address(new_node, &addr);
2530 SET_IA32_ORIG_NODE(new_node, node);
2533 ins[i++] = new_node;
2538 } while (size != 0);
2541 return new_rd_Sync(dbgi, new_block, i, ins);
2548 * Generate a vfist or vfisttp instruction.
2550 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2551 ir_node *mem, ir_node *val, ir_node **fist)
2555 if (ia32_cg_config.use_fisttp) {
2556 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2557 if other users exists */
2558 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2559 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2560 be_new_Keep(block, 1, &value);
2562 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2565 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2568 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2574 * Transforms a general (no special case) Store.
2576 * @return the created ia32 Store node
2578 static ir_node *gen_general_Store(ir_node *node)
2580 ir_node *val = get_Store_value(node);
2581 ir_mode *mode = get_irn_mode(val);
2582 ir_node *block = get_nodes_block(node);
2583 ir_node *new_block = be_transform_node(block);
2584 ir_node *ptr = get_Store_ptr(node);
2585 ir_node *mem = get_Store_mem(node);
2586 dbg_info *dbgi = get_irn_dbg_info(node);
2587 ir_node *new_val, *new_node, *store;
2588 ia32_address_t addr;
2590 /* check for destination address mode */
2591 new_node = try_create_dest_am(node);
2592 if (new_node != NULL)
2595 /* construct store address */
2596 memset(&addr, 0, sizeof(addr));
2597 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2599 if (addr.base == NULL) {
2600 addr.base = noreg_GP;
2602 addr.base = be_transform_node(addr.base);
2605 if (addr.index == NULL) {
2606 addr.index = noreg_GP;
2608 addr.index = be_transform_node(addr.index);
2610 addr.mem = be_transform_node(mem);
2612 if (mode_is_float(mode)) {
2613 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2615 while (is_Conv(val) && mode == get_irn_mode(val)) {
2616 ir_node *op = get_Conv_op(val);
2617 if (!mode_is_float(get_irn_mode(op)))
2621 new_val = be_transform_node(val);
2622 if (ia32_cg_config.use_sse2) {
2623 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2624 addr.index, addr.mem, new_val);
2626 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2627 addr.index, addr.mem, new_val, mode);
2630 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2631 val = get_Conv_op(val);
2633 /* TODO: is this optimisation still necessary at all (middleend)? */
2634 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2635 while (is_Conv(val)) {
2636 ir_node *op = get_Conv_op(val);
2637 if (!mode_is_float(get_irn_mode(op)))
2639 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2643 new_val = be_transform_node(val);
2644 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2646 new_val = create_immediate_or_transform(val, 0);
2647 assert(mode != mode_b);
2649 if (get_mode_size_bits(mode) == 8) {
2650 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2651 addr.index, addr.mem, new_val);
2653 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2654 addr.index, addr.mem, new_val);
2659 set_irn_pinned(store, get_irn_pinned(node));
2660 set_ia32_op_type(store, ia32_AddrModeD);
2661 set_ia32_ls_mode(store, mode);
2663 set_address(store, &addr);
2664 SET_IA32_ORIG_NODE(store, node);
2670 * Transforms a Store.
2672 * @return the created ia32 Store node
2674 static ir_node *gen_Store(ir_node *node)
2676 ir_node *val = get_Store_value(node);
2677 ir_mode *mode = get_irn_mode(val);
2679 if (mode_is_float(mode) && is_Const(val)) {
2680 /* We can transform every floating const store
2681 into a sequence of integer stores.
2682 If the constant is already in a register,
2683 it would be better to use it, but we don't
2684 have this information here. */
2685 return gen_float_const_Store(node, val);
2687 return gen_general_Store(node);
2691 * Transforms a Switch.
2693 * @return the created ia32 SwitchJmp node
2695 static ir_node *create_Switch(ir_node *node)
2697 dbg_info *dbgi = get_irn_dbg_info(node);
2698 ir_node *block = be_transform_node(get_nodes_block(node));
2699 ir_node *sel = get_Cond_selector(node);
2700 ir_node *new_sel = be_transform_node(sel);
2701 long switch_min = LONG_MAX;
2702 long switch_max = LONG_MIN;
2703 long default_pn = get_Cond_default_proj(node);
2705 const ir_edge_t *edge;
2707 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2709 /* determine the smallest switch case value */
2710 foreach_out_edge(node, edge) {
2711 ir_node *proj = get_edge_src_irn(edge);
2712 long pn = get_Proj_proj(proj);
2713 if (pn == default_pn)
2716 if (pn < switch_min)
2718 if (pn > switch_max)
2722 if ((unsigned long) (switch_max - switch_min) > 128000) {
2723 panic("Size of switch %+F bigger than 128000", node);
2726 if (switch_min != 0) {
2727 /* if smallest switch case is not 0 we need an additional sub */
2728 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2729 add_ia32_am_offs_int(new_sel, -switch_min);
2730 set_ia32_op_type(new_sel, ia32_AddrModeS);
2732 SET_IA32_ORIG_NODE(new_sel, node);
2735 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2736 SET_IA32_ORIG_NODE(new_node, node);
2742 * Transform a Cond node.
2744 static ir_node *gen_Cond(ir_node *node)
2746 ir_node *block = get_nodes_block(node);
2747 ir_node *new_block = be_transform_node(block);
2748 dbg_info *dbgi = get_irn_dbg_info(node);
2749 ir_node *sel = get_Cond_selector(node);
2750 ir_mode *sel_mode = get_irn_mode(sel);
2751 ir_node *flags = NULL;
2753 ia32_condition_code_t cc;
2755 if (sel_mode != mode_b) {
2756 return create_Switch(node);
2759 /* we get flags from a Cmp */
2760 flags = get_flags_node(sel, &cc);
2762 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2763 SET_IA32_ORIG_NODE(new_node, node);
2769 * Transform a be_Copy.
2771 static ir_node *gen_be_Copy(ir_node *node)
2773 ir_node *new_node = be_duplicate_node(node);
2774 ir_mode *mode = get_irn_mode(new_node);
2776 if (ia32_mode_needs_gp_reg(mode)) {
2777 set_irn_mode(new_node, mode_Iu);
2783 static ir_node *create_Fucom(ir_node *node)
2785 dbg_info *dbgi = get_irn_dbg_info(node);
2786 ir_node *block = get_nodes_block(node);
2787 ir_node *new_block = be_transform_node(block);
2788 ir_node *left = get_Cmp_left(node);
2789 ir_node *new_left = be_transform_node(left);
2790 ir_node *right = get_Cmp_right(node);
2794 if (ia32_cg_config.use_fucomi) {
2795 new_right = be_transform_node(right);
2796 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2798 set_ia32_commutative(new_node);
2799 SET_IA32_ORIG_NODE(new_node, node);
2801 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2802 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2804 new_right = be_transform_node(right);
2805 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2808 set_ia32_commutative(new_node);
2810 SET_IA32_ORIG_NODE(new_node, node);
2812 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2813 SET_IA32_ORIG_NODE(new_node, node);
2819 static ir_node *create_Ucomi(ir_node *node)
2821 dbg_info *dbgi = get_irn_dbg_info(node);
2822 ir_node *src_block = get_nodes_block(node);
2823 ir_node *new_block = be_transform_node(src_block);
2824 ir_node *left = get_Cmp_left(node);
2825 ir_node *right = get_Cmp_right(node);
2827 ia32_address_mode_t am;
2828 ia32_address_t *addr = &am.addr;
2830 match_arguments(&am, src_block, left, right, NULL,
2831 match_commutative | match_am);
2833 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2834 addr->mem, am.new_op1, am.new_op2,
2836 set_am_attributes(new_node, &am);
2838 SET_IA32_ORIG_NODE(new_node, node);
2840 new_node = fix_mem_proj(new_node, &am);
2846 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2847 * to fold an and into a test node
2849 static bool can_fold_test_and(ir_node *node)
2851 const ir_edge_t *edge;
2853 /** we can only have eq and lg projs */
2854 foreach_out_edge(node, edge) {
2855 ir_node *proj = get_edge_src_irn(edge);
2856 pn_Cmp pnc = get_Proj_pn_cmp(proj);
2857 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2865 * returns true if it is assured, that the upper bits of a node are "clean"
2866 * which means for a 16 or 8 bit value, that the upper bits in the register
2867 * are 0 for unsigned and a copy of the last significant bit for signed
2870 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2872 assert(ia32_mode_needs_gp_reg(mode));
2873 if (get_mode_size_bits(mode) >= 32)
2876 if (is_Proj(transformed_node))
2877 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2879 switch (get_ia32_irn_opcode(transformed_node)) {
2880 case iro_ia32_Conv_I2I:
2881 case iro_ia32_Conv_I2I8Bit: {
2882 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2883 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2885 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2892 if (mode_is_signed(mode)) {
2893 return false; /* TODO handle signed modes */
2895 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2896 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2897 const ia32_immediate_attr_t *attr
2898 = get_ia32_immediate_attr_const(right);
2899 if (attr->symconst == 0 &&
2900 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2904 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2908 /* TODO too conservative if shift amount is constant */
2909 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2912 if (!mode_is_signed(mode)) {
2914 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2915 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2917 /* TODO if one is known to be zero extended, then || is sufficient */
2922 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2923 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2925 case iro_ia32_Const:
2926 case iro_ia32_Immediate: {
2927 const ia32_immediate_attr_t *attr =
2928 get_ia32_immediate_attr_const(transformed_node);
2929 if (mode_is_signed(mode)) {
2930 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2931 return shifted == 0 || shifted == -1;
2933 unsigned long shifted = (unsigned long)attr->offset;
2934 shifted >>= get_mode_size_bits(mode);
2935 return shifted == 0;
2945 * Generate code for a Cmp.
2947 static ir_node *gen_Cmp(ir_node *node)
2949 dbg_info *dbgi = get_irn_dbg_info(node);
2950 ir_node *block = get_nodes_block(node);
2951 ir_node *new_block = be_transform_node(block);
2952 ir_node *left = get_Cmp_left(node);
2953 ir_node *right = get_Cmp_right(node);
2954 ir_mode *cmp_mode = get_irn_mode(left);
2956 ia32_address_mode_t am;
2957 ia32_address_t *addr = &am.addr;
2959 if (mode_is_float(cmp_mode)) {
2960 if (ia32_cg_config.use_sse2) {
2961 return create_Ucomi(node);
2963 return create_Fucom(node);
2967 assert(ia32_mode_needs_gp_reg(cmp_mode));
2969 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2970 if (is_Const_0(right) &&
2972 get_irn_n_edges(left) == 1 &&
2973 can_fold_test_and(node)) {
2974 /* Test(and_left, and_right) */
2975 ir_node *and_left = get_And_left(left);
2976 ir_node *and_right = get_And_right(left);
2978 /* matze: code here used mode instead of cmd_mode, I think it is always
2979 * the same as cmp_mode, but I leave this here to see if this is really
2982 assert(get_irn_mode(and_left) == cmp_mode);
2984 match_arguments(&am, block, and_left, and_right, NULL,
2986 match_am | match_8bit_am | match_16bit_am |
2987 match_am_and_immediates | match_immediate);
2989 /* use 32bit compare mode if possible since the opcode is smaller */
2990 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2991 upper_bits_clean(am.new_op2, cmp_mode)) {
2992 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2995 if (get_mode_size_bits(cmp_mode) == 8) {
2996 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2997 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
2999 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3000 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3003 /* Cmp(left, right) */
3004 match_arguments(&am, block, left, right, NULL,
3005 match_commutative | match_am | match_8bit_am |
3006 match_16bit_am | match_am_and_immediates |
3008 /* use 32bit compare mode if possible since the opcode is smaller */
3009 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3010 upper_bits_clean(am.new_op2, cmp_mode)) {
3011 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3014 if (get_mode_size_bits(cmp_mode) == 8) {
3015 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3016 addr->index, addr->mem, am.new_op1,
3017 am.new_op2, am.ins_permuted);
3019 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3020 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3023 set_am_attributes(new_node, &am);
3024 set_ia32_ls_mode(new_node, cmp_mode);
3026 SET_IA32_ORIG_NODE(new_node, node);
3028 new_node = fix_mem_proj(new_node, &am);
3033 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3034 ia32_condition_code_t cc)
3036 dbg_info *dbgi = get_irn_dbg_info(node);
3037 ir_node *block = get_nodes_block(node);
3038 ir_node *new_block = be_transform_node(block);
3039 ir_node *val_true = get_Mux_true(node);
3040 ir_node *val_false = get_Mux_false(node);
3042 ia32_address_mode_t am;
3043 ia32_address_t *addr;
3045 assert(ia32_cg_config.use_cmov);
3046 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3050 match_arguments(&am, block, val_false, val_true, flags,
3051 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3053 if (am.ins_permuted)
3054 cc = ia32_invert_condition_code(cc);
3056 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3057 addr->mem, am.new_op1, am.new_op2, new_flags,
3059 set_am_attributes(new_node, &am);
3061 SET_IA32_ORIG_NODE(new_node, node);
3063 new_node = fix_mem_proj(new_node, &am);
3069 * Creates a ia32 Setcc instruction.
3071 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3072 ir_node *flags, ia32_condition_code_t cc,
3075 ir_mode *mode = get_irn_mode(orig_node);
3078 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3079 SET_IA32_ORIG_NODE(new_node, orig_node);
3081 /* we might need to conv the result up */
3082 if (get_mode_size_bits(mode) > 8) {
3083 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3084 nomem, new_node, mode_Bu);
3085 SET_IA32_ORIG_NODE(new_node, orig_node);
3092 * Create instruction for an unsigned Difference or Zero.
3094 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3096 ir_mode *mode = get_irn_mode(psi);
3106 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3107 match_mode_neutral | match_am | match_immediate | match_two_users);
3109 block = get_nodes_block(new_node);
3111 if (is_Proj(new_node)) {
3112 sub = get_Proj_pred(new_node);
3113 assert(is_ia32_Sub(sub));
3116 set_irn_mode(sub, mode_T);
3117 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3119 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3121 dbgi = get_irn_dbg_info(psi);
3122 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3123 notn = new_bd_ia32_Not(dbgi, block, sbb);
3125 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3126 set_ia32_commutative(new_node);
3131 * Create an const array of two float consts.
3133 * @param c0 the first constant
3134 * @param c1 the second constant
3135 * @param new_mode IN/OUT for the mode of the constants, if NULL
3136 * smallest possible mode will be used
3138 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3141 ir_mode *mode = *new_mode;
3143 ir_initializer_t *initializer;
3144 ir_tarval *tv0 = get_Const_tarval(c0);
3145 ir_tarval *tv1 = get_Const_tarval(c1);
3148 /* detect the best mode for the constants */
3149 mode = get_tarval_mode(tv0);
3151 if (mode != mode_F) {
3152 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3153 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3155 tv0 = tarval_convert_to(tv0, mode);
3156 tv1 = tarval_convert_to(tv1, mode);
3157 } else if (mode != mode_D) {
3158 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3159 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3161 tv0 = tarval_convert_to(tv0, mode);
3162 tv1 = tarval_convert_to(tv1, mode);
3169 tp = ia32_create_float_type(mode, 4);
3170 tp = ia32_create_float_array(tp);
3172 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3174 set_entity_ld_ident(ent, get_entity_ident(ent));
3175 set_entity_visibility(ent, ir_visibility_private);
3176 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3178 initializer = create_initializer_compound(2);
3180 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3181 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3183 set_entity_initializer(ent, initializer);
3190 * Possible transformations for creating a Setcc.
3192 enum setcc_transform_insn {
3205 typedef struct setcc_transform {
3207 ia32_condition_code_t cc;
3209 enum setcc_transform_insn transform;
3213 } setcc_transform_t;
3216 * Setcc can only handle 0 and 1 result.
3217 * Find a transformation that creates 0 and 1 from
3220 static void find_const_transform(ia32_condition_code_t cc,
3221 ir_tarval *t, ir_tarval *f,
3222 setcc_transform_t *res)
3228 if (tarval_is_null(t)) {
3232 cc = ia32_negate_condition_code(cc);
3233 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3234 // now, t is the bigger one
3238 cc = ia32_negate_condition_code(cc);
3242 if (! tarval_is_null(f)) {
3243 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3246 res->steps[step].transform = SETCC_TR_ADD;
3248 if (t == tarval_bad)
3249 panic("constant subtract failed");
3250 if (! tarval_is_long(f))
3251 panic("tarval is not long");
3253 res->steps[step].val = get_tarval_long(f);
3255 f = tarval_sub(f, f, NULL);
3256 assert(tarval_is_null(f));
3259 if (tarval_is_one(t)) {
3260 res->steps[step].transform = SETCC_TR_SET;
3261 res->num_steps = ++step;
3265 if (tarval_is_minus_one(t)) {
3266 res->steps[step].transform = SETCC_TR_NEG;
3268 res->steps[step].transform = SETCC_TR_SET;
3269 res->num_steps = ++step;
3272 if (tarval_is_long(t)) {
3273 long v = get_tarval_long(t);
3275 res->steps[step].val = 0;
3278 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3280 res->steps[step].transform = SETCC_TR_LEAxx;
3281 res->steps[step].scale = 3; /* (a << 3) + a */
3284 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3286 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3287 res->steps[step].scale = 3; /* (a << 3) */
3290 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3292 res->steps[step].transform = SETCC_TR_LEAxx;
3293 res->steps[step].scale = 2; /* (a << 2) + a */
3296 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3298 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3299 res->steps[step].scale = 2; /* (a << 2) */
3302 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3304 res->steps[step].transform = SETCC_TR_LEAxx;
3305 res->steps[step].scale = 1; /* (a << 1) + a */
3308 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3310 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3311 res->steps[step].scale = 1; /* (a << 1) */
3314 res->num_steps = step;
3317 if (! tarval_is_single_bit(t)) {
3318 res->steps[step].transform = SETCC_TR_AND;
3319 res->steps[step].val = v;
3321 res->steps[step].transform = SETCC_TR_NEG;
3323 int v = get_tarval_lowest_bit(t);
3326 res->steps[step].transform = SETCC_TR_SHL;
3327 res->steps[step].scale = v;
3331 res->steps[step].transform = SETCC_TR_SET;
3332 res->num_steps = ++step;
3335 panic("tarval is not long");
3339 * Transforms a Mux node into some code sequence.
3341 * @return The transformed node.
3343 static ir_node *gen_Mux(ir_node *node)
3345 dbg_info *dbgi = get_irn_dbg_info(node);
3346 ir_node *block = get_nodes_block(node);
3347 ir_node *new_block = be_transform_node(block);
3348 ir_node *mux_true = get_Mux_true(node);
3349 ir_node *mux_false = get_Mux_false(node);
3350 ir_node *cond = get_Mux_sel(node);
3351 ir_mode *mode = get_irn_mode(node);
3355 ia32_condition_code_t cc;
3357 assert(get_irn_mode(cond) == mode_b);
3359 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3361 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3364 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3365 if (mode_is_float(mode)) {
3366 ir_node *cmp = get_Proj_pred(cond);
3367 ir_node *cmp_left = get_Cmp_left(cmp);
3368 ir_node *cmp_right = get_Cmp_right(cmp);
3369 int pnc = get_Proj_proj(cond);
3371 if (ia32_cg_config.use_sse2) {
3372 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3373 if (cmp_left == mux_true && cmp_right == mux_false) {
3374 /* Mux(a <= b, a, b) => MIN */
3375 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3376 match_commutative | match_am | match_two_users);
3377 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3378 /* Mux(a <= b, b, a) => MAX */
3379 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3380 match_commutative | match_am | match_two_users);
3382 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3383 if (cmp_left == mux_true && cmp_right == mux_false) {
3384 /* Mux(a >= b, a, b) => MAX */
3385 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3386 match_commutative | match_am | match_two_users);
3387 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3388 /* Mux(a >= b, b, a) => MIN */
3389 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3390 match_commutative | match_am | match_two_users);
3395 if (is_Const(mux_true) && is_Const(mux_false)) {
3396 ia32_address_mode_t am;
3401 flags = get_flags_node(cond, &cc);
3402 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3404 if (ia32_cg_config.use_sse2) {
3405 /* cannot load from different mode on SSE */
3408 /* x87 can load any mode */
3412 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3414 switch (get_mode_size_bytes(new_mode)) {
3424 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3425 set_ia32_am_scale(new_node, 2);
3430 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3431 set_ia32_am_scale(new_node, 1);
3434 /* arg, shift 16 NOT supported */
3436 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3439 panic("Unsupported constant size");
3442 am.ls_mode = new_mode;
3443 am.addr.base = get_symconst_base();
3444 am.addr.index = new_node;
3445 am.addr.mem = nomem;
3447 am.addr.scale = scale;
3448 am.addr.use_frame = 0;
3449 am.addr.frame_entity = NULL;
3450 am.addr.symconst_sign = 0;
3451 am.mem_proj = am.addr.mem;
3452 am.op_type = ia32_AddrModeS;
3455 am.pinned = op_pin_state_floats;
3457 am.ins_permuted = false;
3459 if (ia32_cg_config.use_sse2)
3460 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3462 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3463 set_am_attributes(load, &am);
3465 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3467 panic("cannot transform floating point Mux");
3470 assert(ia32_mode_needs_gp_reg(mode));
3472 if (is_Proj(cond)) {
3473 ir_node *cmp = get_Proj_pred(cond);
3475 ir_node *cmp_left = get_Cmp_left(cmp);
3476 ir_node *cmp_right = get_Cmp_right(cmp);
3477 ir_node *val_true = mux_true;
3478 ir_node *val_false = mux_false;
3479 int pnc = get_Proj_proj(cond);
3481 if (is_Const(val_true) && is_Const_null(val_true)) {
3482 ir_node *tmp = val_false;
3483 val_false = val_true;
3485 pnc = get_negated_pnc(pnc, get_irn_mode(cmp_left));
3487 if (is_Const_0(val_false) && is_Sub(val_true)) {
3488 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3489 && get_Sub_left(val_true) == cmp_left
3490 && get_Sub_right(val_true) == cmp_right) {
3491 return create_doz(node, cmp_left, cmp_right);
3493 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3494 && get_Sub_left(val_true) == cmp_right
3495 && get_Sub_right(val_true) == cmp_left) {
3496 return create_doz(node, cmp_right, cmp_left);
3502 flags = get_flags_node(cond, &cc);
3504 if (is_Const(mux_true) && is_Const(mux_false)) {
3505 /* both are const, good */
3506 ir_tarval *tv_true = get_Const_tarval(mux_true);
3507 ir_tarval *tv_false = get_Const_tarval(mux_false);
3508 setcc_transform_t res;
3511 find_const_transform(cc, tv_true, tv_false, &res);
3513 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3516 switch (res.steps[step].transform) {
3518 imm = ia32_immediate_from_long(res.steps[step].val);
3519 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3521 case SETCC_TR_ADDxx:
3522 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3525 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3526 set_ia32_am_scale(new_node, res.steps[step].scale);
3527 set_ia32_am_offs_int(new_node, res.steps[step].val);
3529 case SETCC_TR_LEAxx:
3530 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3531 set_ia32_am_scale(new_node, res.steps[step].scale);
3532 set_ia32_am_offs_int(new_node, res.steps[step].val);
3535 imm = ia32_immediate_from_long(res.steps[step].scale);
3536 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3539 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3542 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3545 imm = ia32_immediate_from_long(res.steps[step].val);
3546 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3549 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3552 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3555 panic("unknown setcc transform");
3559 new_node = create_CMov(node, cond, flags, cc);
3567 * Create a conversion from x87 state register to general purpose.
3569 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3571 ir_node *block = be_transform_node(get_nodes_block(node));
3572 ir_node *op = get_Conv_op(node);
3573 ir_node *new_op = be_transform_node(op);
3574 ir_graph *irg = current_ir_graph;
3575 dbg_info *dbgi = get_irn_dbg_info(node);
3576 ir_mode *mode = get_irn_mode(node);
3577 ir_node *fist, *load, *mem;
3579 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3580 set_irn_pinned(fist, op_pin_state_floats);
3581 set_ia32_use_frame(fist);
3582 set_ia32_op_type(fist, ia32_AddrModeD);
3584 assert(get_mode_size_bits(mode) <= 32);
3585 /* exception we can only store signed 32 bit integers, so for unsigned
3586 we store a 64bit (signed) integer and load the lower bits */
3587 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3588 set_ia32_ls_mode(fist, mode_Ls);
3590 set_ia32_ls_mode(fist, mode_Is);
3592 SET_IA32_ORIG_NODE(fist, node);
3595 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3597 set_irn_pinned(load, op_pin_state_floats);
3598 set_ia32_use_frame(load);
3599 set_ia32_op_type(load, ia32_AddrModeS);
3600 set_ia32_ls_mode(load, mode_Is);
3601 if (get_ia32_ls_mode(fist) == mode_Ls) {
3602 ia32_attr_t *attr = get_ia32_attr(load);
3603 attr->data.need_64bit_stackent = 1;
3605 ia32_attr_t *attr = get_ia32_attr(load);
3606 attr->data.need_32bit_stackent = 1;
3608 SET_IA32_ORIG_NODE(load, node);
3610 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3614 * Creates a x87 strict Conv by placing a Store and a Load
3616 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3618 ir_node *block = get_nodes_block(node);
3619 ir_graph *irg = get_Block_irg(block);
3620 dbg_info *dbgi = get_irn_dbg_info(node);
3621 ir_node *frame = get_irg_frame(irg);
3622 ir_node *store, *load;
3625 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3626 set_ia32_use_frame(store);
3627 set_ia32_op_type(store, ia32_AddrModeD);
3628 SET_IA32_ORIG_NODE(store, node);
3630 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3631 set_ia32_use_frame(load);
3632 set_ia32_op_type(load, ia32_AddrModeS);
3633 SET_IA32_ORIG_NODE(load, node);
3635 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3639 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3640 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3642 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3644 func = get_mode_size_bits(mode) == 8 ?
3645 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3646 return func(dbgi, block, base, index, mem, val, mode);
3650 * Create a conversion from general purpose to x87 register
3652 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3654 ir_node *src_block = get_nodes_block(node);
3655 ir_node *block = be_transform_node(src_block);
3656 ir_graph *irg = get_Block_irg(block);
3657 dbg_info *dbgi = get_irn_dbg_info(node);
3658 ir_node *op = get_Conv_op(node);
3659 ir_node *new_op = NULL;
3661 ir_mode *store_mode;
3666 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3667 if (possible_int_mode_for_fp(src_mode)) {
3668 ia32_address_mode_t am;
3670 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3671 if (am.op_type == ia32_AddrModeS) {
3672 ia32_address_t *addr = &am.addr;
3674 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3675 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3677 set_am_attributes(fild, &am);
3678 SET_IA32_ORIG_NODE(fild, node);
3680 fix_mem_proj(fild, &am);
3685 if (new_op == NULL) {
3686 new_op = be_transform_node(op);
3689 mode = get_irn_mode(op);
3691 /* first convert to 32 bit signed if necessary */
3692 if (get_mode_size_bits(src_mode) < 32) {
3693 if (!upper_bits_clean(new_op, src_mode)) {
3694 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3695 SET_IA32_ORIG_NODE(new_op, node);
3700 assert(get_mode_size_bits(mode) == 32);
3703 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3705 set_ia32_use_frame(store);
3706 set_ia32_op_type(store, ia32_AddrModeD);
3707 set_ia32_ls_mode(store, mode_Iu);
3709 /* exception for 32bit unsigned, do a 64bit spill+load */
3710 if (!mode_is_signed(mode)) {
3713 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3715 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3716 noreg_GP, nomem, zero_const);
3718 set_ia32_use_frame(zero_store);
3719 set_ia32_op_type(zero_store, ia32_AddrModeD);
3720 add_ia32_am_offs_int(zero_store, 4);
3721 set_ia32_ls_mode(zero_store, mode_Iu);
3726 store = new_rd_Sync(dbgi, block, 2, in);
3727 store_mode = mode_Ls;
3729 store_mode = mode_Is;
3733 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3735 set_ia32_use_frame(fild);
3736 set_ia32_op_type(fild, ia32_AddrModeS);
3737 set_ia32_ls_mode(fild, store_mode);
3739 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3745 * Create a conversion from one integer mode into another one
3747 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3748 dbg_info *dbgi, ir_node *block, ir_node *op,
3751 ir_node *new_block = be_transform_node(block);
3753 ir_mode *smaller_mode;
3754 ia32_address_mode_t am;
3755 ia32_address_t *addr = &am.addr;
3758 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3759 smaller_mode = src_mode;
3761 smaller_mode = tgt_mode;
3764 #ifdef DEBUG_libfirm
3766 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3771 match_arguments(&am, block, NULL, op, NULL,
3772 match_am | match_8bit_am | match_16bit_am);
3774 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3775 /* unnecessary conv. in theory it shouldn't have been AM */
3776 assert(is_ia32_NoReg_GP(addr->base));
3777 assert(is_ia32_NoReg_GP(addr->index));
3778 assert(is_NoMem(addr->mem));
3779 assert(am.addr.offset == 0);
3780 assert(am.addr.symconst_ent == NULL);
3784 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3785 addr->mem, am.new_op2, smaller_mode);
3786 set_am_attributes(new_node, &am);
3787 /* match_arguments assume that out-mode = in-mode, this isn't true here
3789 set_ia32_ls_mode(new_node, smaller_mode);
3790 SET_IA32_ORIG_NODE(new_node, node);
3791 new_node = fix_mem_proj(new_node, &am);
3796 * Transforms a Conv node.
3798 * @return The created ia32 Conv node
3800 static ir_node *gen_Conv(ir_node *node)
3802 ir_node *block = get_nodes_block(node);
3803 ir_node *new_block = be_transform_node(block);
3804 ir_node *op = get_Conv_op(node);
3805 ir_node *new_op = NULL;
3806 dbg_info *dbgi = get_irn_dbg_info(node);
3807 ir_mode *src_mode = get_irn_mode(op);
3808 ir_mode *tgt_mode = get_irn_mode(node);
3809 int src_bits = get_mode_size_bits(src_mode);
3810 int tgt_bits = get_mode_size_bits(tgt_mode);
3811 ir_node *res = NULL;
3813 assert(!mode_is_int(src_mode) || src_bits <= 32);
3814 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3816 /* modeB -> X should already be lowered by the lower_mode_b pass */
3817 if (src_mode == mode_b) {
3818 panic("ConvB not lowered %+F", node);
3821 if (src_mode == tgt_mode) {
3822 if (get_Conv_strict(node)) {
3823 if (ia32_cg_config.use_sse2) {
3824 /* when we are in SSE mode, we can kill all strict no-op conversion */
3825 return be_transform_node(op);
3828 /* this should be optimized already, but who knows... */
3829 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3830 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3831 return be_transform_node(op);
3835 if (mode_is_float(src_mode)) {
3836 new_op = be_transform_node(op);
3837 /* we convert from float ... */
3838 if (mode_is_float(tgt_mode)) {
3840 if (ia32_cg_config.use_sse2) {
3841 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3842 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3844 set_ia32_ls_mode(res, tgt_mode);
3846 if (get_Conv_strict(node)) {
3847 /* if fp_no_float_fold is not set then we assume that we
3848 * don't have any float operations in a non
3849 * mode_float_arithmetic mode and can skip strict upconvs */
3850 if (src_bits < tgt_bits) {
3851 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3854 res = gen_x87_strict_conv(tgt_mode, new_op);
3855 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3859 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3864 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3865 if (ia32_cg_config.use_sse2) {
3866 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3868 set_ia32_ls_mode(res, src_mode);
3870 return gen_x87_fp_to_gp(node);
3874 /* we convert from int ... */
3875 if (mode_is_float(tgt_mode)) {
3877 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3878 if (ia32_cg_config.use_sse2) {
3879 new_op = be_transform_node(op);
3880 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3882 set_ia32_ls_mode(res, tgt_mode);
3884 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3885 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3886 res = gen_x87_gp_to_fp(node, src_mode);
3888 /* we need a strict-Conv, if the int mode has more bits than the
3890 if (float_mantissa < int_mantissa) {
3891 res = gen_x87_strict_conv(tgt_mode, res);
3892 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3896 } else if (tgt_mode == mode_b) {
3897 /* mode_b lowering already took care that we only have 0/1 values */
3898 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3899 src_mode, tgt_mode));
3900 return be_transform_node(op);
3903 if (src_bits == tgt_bits) {
3904 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3905 src_mode, tgt_mode));
3906 return be_transform_node(op);
3909 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3917 static ir_node *create_immediate_or_transform(ir_node *node,
3918 char immediate_constraint_type)
3920 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3921 if (new_node == NULL) {
3922 new_node = be_transform_node(node);
3928 * Transforms a FrameAddr into an ia32 Add.
3930 static ir_node *gen_be_FrameAddr(ir_node *node)
3932 ir_node *block = be_transform_node(get_nodes_block(node));
3933 ir_node *op = be_get_FrameAddr_frame(node);
3934 ir_node *new_op = be_transform_node(op);
3935 dbg_info *dbgi = get_irn_dbg_info(node);
3938 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3939 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3940 set_ia32_use_frame(new_node);
3942 SET_IA32_ORIG_NODE(new_node, node);
3948 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3950 static ir_node *gen_be_Return(ir_node *node)
3952 ir_graph *irg = current_ir_graph;
3953 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3954 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3955 ir_entity *ent = get_irg_entity(irg);
3956 ir_type *tp = get_entity_type(ent);
3961 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3962 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3964 int pn_ret_val, pn_ret_mem, arity, i;
3966 assert(ret_val != NULL);
3967 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3968 return be_duplicate_node(node);
3971 res_type = get_method_res_type(tp, 0);
3973 if (! is_Primitive_type(res_type)) {
3974 return be_duplicate_node(node);
3977 mode = get_type_mode(res_type);
3978 if (! mode_is_float(mode)) {
3979 return be_duplicate_node(node);
3982 assert(get_method_n_ress(tp) == 1);
3984 pn_ret_val = get_Proj_proj(ret_val);
3985 pn_ret_mem = get_Proj_proj(ret_mem);
3987 /* get the Barrier */
3988 barrier = get_Proj_pred(ret_val);
3990 /* get result input of the Barrier */
3991 ret_val = get_irn_n(barrier, pn_ret_val);
3992 new_ret_val = be_transform_node(ret_val);
3994 /* get memory input of the Barrier */
3995 ret_mem = get_irn_n(barrier, pn_ret_mem);
3996 new_ret_mem = be_transform_node(ret_mem);
3998 frame = get_irg_frame(irg);
4000 dbgi = get_irn_dbg_info(barrier);
4001 block = be_transform_node(get_nodes_block(barrier));
4003 /* store xmm0 onto stack */
4004 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4005 new_ret_mem, new_ret_val);
4006 set_ia32_ls_mode(sse_store, mode);
4007 set_ia32_op_type(sse_store, ia32_AddrModeD);
4008 set_ia32_use_frame(sse_store);
4010 /* load into x87 register */
4011 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
4012 set_ia32_op_type(fld, ia32_AddrModeS);
4013 set_ia32_use_frame(fld);
4015 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4016 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4018 /* create a new barrier */
4019 arity = get_irn_arity(barrier);
4020 in = ALLOCAN(ir_node*, arity);
4021 for (i = 0; i < arity; ++i) {
4024 if (i == pn_ret_val) {
4026 } else if (i == pn_ret_mem) {
4029 ir_node *in = get_irn_n(barrier, i);
4030 new_in = be_transform_node(in);
4035 new_barrier = new_ir_node(dbgi, irg, block,
4036 get_irn_op(barrier), get_irn_mode(barrier),
4038 copy_node_attr(irg, barrier, new_barrier);
4039 be_duplicate_deps(barrier, new_barrier);
4040 be_set_transformed_node(barrier, new_barrier);
4042 /* transform normally */
4043 return be_duplicate_node(node);
4047 * Transform a be_AddSP into an ia32_SubSP.
4049 static ir_node *gen_be_AddSP(ir_node *node)
4051 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4052 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4054 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4055 match_am | match_immediate);
4059 * Transform a be_SubSP into an ia32_AddSP
4061 static ir_node *gen_be_SubSP(ir_node *node)
4063 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4064 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4066 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4067 match_am | match_immediate);
4071 * Change some phi modes
4073 static ir_node *gen_Phi(ir_node *node)
4075 const arch_register_req_t *req;
4076 ir_node *block = be_transform_node(get_nodes_block(node));
4077 ir_graph *irg = current_ir_graph;
4078 dbg_info *dbgi = get_irn_dbg_info(node);
4079 ir_mode *mode = get_irn_mode(node);
4082 if (ia32_mode_needs_gp_reg(mode)) {
4083 /* we shouldn't have any 64bit stuff around anymore */
4084 assert(get_mode_size_bits(mode) <= 32);
4085 /* all integer operations are on 32bit registers now */
4087 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4088 } else if (mode_is_float(mode)) {
4089 if (ia32_cg_config.use_sse2) {
4091 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4094 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4097 req = arch_no_register_req;
4100 /* phi nodes allow loops, so we use the old arguments for now
4101 * and fix this later */
4102 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4103 get_irn_in(node) + 1);
4104 copy_node_attr(irg, node, phi);
4105 be_duplicate_deps(node, phi);
4107 arch_set_out_register_req(phi, 0, req);
4109 be_enqueue_preds(node);
4114 static ir_node *gen_Jmp(ir_node *node)
4116 ir_node *block = get_nodes_block(node);
4117 ir_node *new_block = be_transform_node(block);
4118 dbg_info *dbgi = get_irn_dbg_info(node);
4121 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4122 SET_IA32_ORIG_NODE(new_node, node);
4130 static ir_node *gen_IJmp(ir_node *node)
4132 ir_node *block = get_nodes_block(node);
4133 ir_node *new_block = be_transform_node(block);
4134 dbg_info *dbgi = get_irn_dbg_info(node);
4135 ir_node *op = get_IJmp_target(node);
4137 ia32_address_mode_t am;
4138 ia32_address_t *addr = &am.addr;
4140 assert(get_irn_mode(op) == mode_P);
4142 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4144 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4145 addr->mem, am.new_op2);
4146 set_am_attributes(new_node, &am);
4147 SET_IA32_ORIG_NODE(new_node, node);
4149 new_node = fix_mem_proj(new_node, &am);
4154 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4156 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4157 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4159 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4160 match_immediate | match_mode_neutral);
4163 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4165 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4166 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4167 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4171 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4173 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4174 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4175 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4179 static ir_node *gen_ia32_l_Add(ir_node *node)
4181 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4182 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4183 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4184 match_commutative | match_am | match_immediate |
4185 match_mode_neutral);
4187 if (is_Proj(lowered)) {
4188 lowered = get_Proj_pred(lowered);
4190 assert(is_ia32_Add(lowered));
4191 set_irn_mode(lowered, mode_T);
4197 static ir_node *gen_ia32_l_Adc(ir_node *node)
4199 return gen_binop_flags(node, new_bd_ia32_Adc,
4200 match_commutative | match_am | match_immediate |
4201 match_mode_neutral);
4205 * Transforms a l_MulS into a "real" MulS node.
4207 * @return the created ia32 Mul node
4209 static ir_node *gen_ia32_l_Mul(ir_node *node)
4211 ir_node *left = get_binop_left(node);
4212 ir_node *right = get_binop_right(node);
4214 return gen_binop(node, left, right, new_bd_ia32_Mul,
4215 match_commutative | match_am | match_mode_neutral);
4219 * Transforms a l_IMulS into a "real" IMul1OPS node.
4221 * @return the created ia32 IMul1OP node
4223 static ir_node *gen_ia32_l_IMul(ir_node *node)
4225 ir_node *left = get_binop_left(node);
4226 ir_node *right = get_binop_right(node);
4228 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4229 match_commutative | match_am | match_mode_neutral);
4232 static ir_node *gen_ia32_l_Sub(ir_node *node)
4234 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4235 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4236 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4237 match_am | match_immediate | match_mode_neutral);
4239 if (is_Proj(lowered)) {
4240 lowered = get_Proj_pred(lowered);
4242 assert(is_ia32_Sub(lowered));
4243 set_irn_mode(lowered, mode_T);
4249 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4251 return gen_binop_flags(node, new_bd_ia32_Sbb,
4252 match_am | match_immediate | match_mode_neutral);
4256 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4257 * op1 - target to be shifted
4258 * op2 - contains bits to be shifted into target
4260 * Only op3 can be an immediate.
4262 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4263 ir_node *low, ir_node *count)
4265 ir_node *block = get_nodes_block(node);
4266 ir_node *new_block = be_transform_node(block);
4267 dbg_info *dbgi = get_irn_dbg_info(node);
4268 ir_node *new_high = be_transform_node(high);
4269 ir_node *new_low = be_transform_node(low);
4273 /* the shift amount can be any mode that is bigger than 5 bits, since all
4274 * other bits are ignored anyway */
4275 while (is_Conv(count) &&
4276 get_irn_n_edges(count) == 1 &&
4277 mode_is_int(get_irn_mode(count))) {
4278 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4279 count = get_Conv_op(count);
4281 new_count = create_immediate_or_transform(count, 0);
4283 if (is_ia32_l_ShlD(node)) {
4284 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4287 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4290 SET_IA32_ORIG_NODE(new_node, node);
4295 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4297 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4298 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4299 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4300 return gen_lowered_64bit_shifts(node, high, low, count);
4303 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4305 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4306 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4307 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4308 return gen_lowered_64bit_shifts(node, high, low, count);
4311 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4313 ir_node *src_block = get_nodes_block(node);
4314 ir_node *block = be_transform_node(src_block);
4315 ir_graph *irg = current_ir_graph;
4316 dbg_info *dbgi = get_irn_dbg_info(node);
4317 ir_node *frame = get_irg_frame(irg);
4318 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4319 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4320 ir_node *new_val_low = be_transform_node(val_low);
4321 ir_node *new_val_high = be_transform_node(val_high);
4323 ir_node *sync, *fild, *res;
4324 ir_node *store_low, *store_high;
4326 if (ia32_cg_config.use_sse2) {
4327 panic("ia32_l_LLtoFloat not implemented for SSE2");
4331 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4333 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4335 SET_IA32_ORIG_NODE(store_low, node);
4336 SET_IA32_ORIG_NODE(store_high, node);
4338 set_ia32_use_frame(store_low);
4339 set_ia32_use_frame(store_high);
4340 set_ia32_op_type(store_low, ia32_AddrModeD);
4341 set_ia32_op_type(store_high, ia32_AddrModeD);
4342 set_ia32_ls_mode(store_low, mode_Iu);
4343 set_ia32_ls_mode(store_high, mode_Is);
4344 add_ia32_am_offs_int(store_high, 4);
4348 sync = new_rd_Sync(dbgi, block, 2, in);
4351 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4353 set_ia32_use_frame(fild);
4354 set_ia32_op_type(fild, ia32_AddrModeS);
4355 set_ia32_ls_mode(fild, mode_Ls);
4357 SET_IA32_ORIG_NODE(fild, node);
4359 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4361 if (! mode_is_signed(get_irn_mode(val_high))) {
4362 ia32_address_mode_t am;
4364 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4367 am.addr.base = get_symconst_base();
4368 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4369 am.addr.mem = nomem;
4372 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4373 am.addr.use_frame = 0;
4374 am.addr.frame_entity = NULL;
4375 am.addr.symconst_sign = 0;
4376 am.ls_mode = mode_F;
4377 am.mem_proj = nomem;
4378 am.op_type = ia32_AddrModeS;
4380 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4381 am.pinned = op_pin_state_floats;
4383 am.ins_permuted = false;
4385 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4386 am.new_op1, am.new_op2, get_fpcw());
4387 set_am_attributes(fadd, &am);
4389 set_irn_mode(fadd, mode_T);
4390 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4395 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4397 ir_node *src_block = get_nodes_block(node);
4398 ir_node *block = be_transform_node(src_block);
4399 ir_graph *irg = get_Block_irg(block);
4400 dbg_info *dbgi = get_irn_dbg_info(node);
4401 ir_node *frame = get_irg_frame(irg);
4402 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4403 ir_node *new_val = be_transform_node(val);
4404 ir_node *fist, *mem;
4406 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4407 SET_IA32_ORIG_NODE(fist, node);
4408 set_ia32_use_frame(fist);
4409 set_ia32_op_type(fist, ia32_AddrModeD);
4410 set_ia32_ls_mode(fist, mode_Ls);
4415 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4417 ir_node *block = be_transform_node(get_nodes_block(node));
4418 ir_graph *irg = get_Block_irg(block);
4419 ir_node *pred = get_Proj_pred(node);
4420 ir_node *new_pred = be_transform_node(pred);
4421 ir_node *frame = get_irg_frame(irg);
4422 dbg_info *dbgi = get_irn_dbg_info(node);
4423 long pn = get_Proj_proj(node);
4428 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4429 SET_IA32_ORIG_NODE(load, node);
4430 set_ia32_use_frame(load);
4431 set_ia32_op_type(load, ia32_AddrModeS);
4432 set_ia32_ls_mode(load, mode_Iu);
4433 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4434 * 32 bit from it with this particular load */
4435 attr = get_ia32_attr(load);
4436 attr->data.need_64bit_stackent = 1;
4438 if (pn == pn_ia32_l_FloattoLL_res_high) {
4439 add_ia32_am_offs_int(load, 4);
4441 assert(pn == pn_ia32_l_FloattoLL_res_low);
4444 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4450 * Transform the Projs of an AddSP.
4452 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4454 ir_node *pred = get_Proj_pred(node);
4455 ir_node *new_pred = be_transform_node(pred);
4456 dbg_info *dbgi = get_irn_dbg_info(node);
4457 long proj = get_Proj_proj(node);
4459 if (proj == pn_be_AddSP_sp) {
4460 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4461 pn_ia32_SubSP_stack);
4462 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4464 } else if (proj == pn_be_AddSP_res) {
4465 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4466 pn_ia32_SubSP_addr);
4467 } else if (proj == pn_be_AddSP_M) {
4468 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4471 panic("No idea how to transform proj->AddSP");
4475 * Transform the Projs of a SubSP.
4477 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4479 ir_node *pred = get_Proj_pred(node);
4480 ir_node *new_pred = be_transform_node(pred);
4481 dbg_info *dbgi = get_irn_dbg_info(node);
4482 long proj = get_Proj_proj(node);
4484 if (proj == pn_be_SubSP_sp) {
4485 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4486 pn_ia32_AddSP_stack);
4487 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4489 } else if (proj == pn_be_SubSP_M) {
4490 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4493 panic("No idea how to transform proj->SubSP");
4497 * Transform and renumber the Projs from a Load.
4499 static ir_node *gen_Proj_Load(ir_node *node)
4502 ir_node *block = be_transform_node(get_nodes_block(node));
4503 ir_node *pred = get_Proj_pred(node);
4504 dbg_info *dbgi = get_irn_dbg_info(node);
4505 long proj = get_Proj_proj(node);
4507 /* loads might be part of source address mode matches, so we don't
4508 * transform the ProjMs yet (with the exception of loads whose result is
4511 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4514 /* this is needed, because sometimes we have loops that are only
4515 reachable through the ProjM */
4516 be_enqueue_preds(node);
4517 /* do it in 2 steps, to silence firm verifier */
4518 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4519 set_Proj_proj(res, pn_ia32_mem);
4523 /* renumber the proj */
4524 new_pred = be_transform_node(pred);
4525 if (is_ia32_Load(new_pred)) {
4528 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4530 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4531 case pn_Load_X_regular:
4532 return new_rd_Jmp(dbgi, block);
4533 case pn_Load_X_except:
4534 /* This Load might raise an exception. Mark it. */
4535 set_ia32_exc_label(new_pred, 1);
4536 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4540 } else if (is_ia32_Conv_I2I(new_pred) ||
4541 is_ia32_Conv_I2I8Bit(new_pred)) {
4542 set_irn_mode(new_pred, mode_T);
4543 if (proj == pn_Load_res) {
4544 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4545 } else if (proj == pn_Load_M) {
4546 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4548 } else if (is_ia32_xLoad(new_pred)) {
4551 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4553 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4554 case pn_Load_X_regular:
4555 return new_rd_Jmp(dbgi, block);
4556 case pn_Load_X_except:
4557 /* This Load might raise an exception. Mark it. */
4558 set_ia32_exc_label(new_pred, 1);
4559 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4563 } else if (is_ia32_vfld(new_pred)) {
4566 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4568 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4569 case pn_Load_X_regular:
4570 return new_rd_Jmp(dbgi, block);
4571 case pn_Load_X_except:
4572 /* This Load might raise an exception. Mark it. */
4573 set_ia32_exc_label(new_pred, 1);
4574 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4579 /* can happen for ProJMs when source address mode happened for the
4582 /* however it should not be the result proj, as that would mean the
4583 load had multiple users and should not have been used for
4585 if (proj != pn_Load_M) {
4586 panic("internal error: transformed node not a Load");
4588 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4591 panic("No idea how to transform proj");
4595 * Transform and renumber the Projs from a Div or Mod instruction.
4597 static ir_node *gen_Proj_Div(ir_node *node)
4599 ir_node *block = be_transform_node(get_nodes_block(node));
4600 ir_node *pred = get_Proj_pred(node);
4601 ir_node *new_pred = be_transform_node(pred);
4602 dbg_info *dbgi = get_irn_dbg_info(node);
4603 long proj = get_Proj_proj(node);
4605 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4606 assert(pn_ia32_Div_div_res == pn_ia32_IDiv_div_res);
4610 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4611 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4612 } else if (is_ia32_xDiv(new_pred)) {
4613 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4614 } else if (is_ia32_vfdiv(new_pred)) {
4615 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4617 panic("Div transformed to unexpected thing %+F", new_pred);
4620 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4621 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4622 } else if (is_ia32_xDiv(new_pred)) {
4623 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4624 } else if (is_ia32_vfdiv(new_pred)) {
4625 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4627 panic("Div transformed to unexpected thing %+F", new_pred);
4629 case pn_Div_X_regular:
4630 return new_rd_Jmp(dbgi, block);
4631 case pn_Div_X_except:
4632 set_ia32_exc_label(new_pred, 1);
4633 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4638 panic("No idea how to transform proj->Div");
4642 * Transform and renumber the Projs from a Div or Mod instruction.
4644 static ir_node *gen_Proj_Mod(ir_node *node)
4646 ir_node *pred = get_Proj_pred(node);
4647 ir_node *new_pred = be_transform_node(pred);
4648 dbg_info *dbgi = get_irn_dbg_info(node);
4649 long proj = get_Proj_proj(node);
4651 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4652 assert(pn_ia32_Div_M == pn_ia32_IDiv_M);
4653 assert(pn_ia32_Div_mod_res == pn_ia32_IDiv_mod_res);
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4659 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4660 case pn_Mod_X_except:
4661 set_ia32_exc_label(new_pred, 1);
4662 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4666 panic("No idea how to transform proj->Mod");
4670 * Transform and renumber the Projs from a CopyB.
4672 static ir_node *gen_Proj_CopyB(ir_node *node)
4674 ir_node *pred = get_Proj_pred(node);
4675 ir_node *new_pred = be_transform_node(pred);
4676 dbg_info *dbgi = get_irn_dbg_info(node);
4677 long proj = get_Proj_proj(node);
4681 if (is_ia32_CopyB_i(new_pred)) {
4682 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4683 } else if (is_ia32_CopyB(new_pred)) {
4684 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4691 panic("No idea how to transform proj->CopyB");
4694 static ir_node *gen_be_Call(ir_node *node)
4696 dbg_info *const dbgi = get_irn_dbg_info(node);
4697 ir_node *const src_block = get_nodes_block(node);
4698 ir_node *const block = be_transform_node(src_block);
4699 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4700 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4701 ir_node *const sp = be_transform_node(src_sp);
4702 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4703 ia32_address_mode_t am;
4704 ia32_address_t *const addr = &am.addr;
4709 ir_node * eax = noreg_GP;
4710 ir_node * ecx = noreg_GP;
4711 ir_node * edx = noreg_GP;
4712 unsigned const pop = be_Call_get_pop(node);
4713 ir_type *const call_tp = be_Call_get_type(node);
4714 int old_no_pic_adjust;
4716 /* Run the x87 simulator if the call returns a float value */
4717 if (get_method_n_ress(call_tp) > 0) {
4718 ir_type *const res_type = get_method_res_type(call_tp, 0);
4719 ir_mode *const res_mode = get_type_mode(res_type);
4721 if (res_mode != NULL && mode_is_float(res_mode)) {
4722 ir_graph *irg = current_ir_graph;
4723 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4724 irg_data->do_x87_sim = 1;
4728 /* We do not want be_Call direct calls */
4729 assert(be_Call_get_entity(node) == NULL);
4731 /* special case for PIC trampoline calls */
4732 old_no_pic_adjust = ia32_no_pic_adjust;
4733 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4735 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4736 match_am | match_immediate);
4738 ia32_no_pic_adjust = old_no_pic_adjust;
4740 i = get_irn_arity(node) - 1;
4741 fpcw = be_transform_node(get_irn_n(node, i--));
4742 for (; i >= be_pos_Call_first_arg; --i) {
4743 arch_register_req_t const *const req = arch_get_register_req(node, i);
4744 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4746 assert(req->type == arch_register_req_type_limited);
4747 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4749 switch (*req->limited) {
4750 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4751 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4752 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4753 default: panic("Invalid GP register for register parameter");
4757 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4758 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4759 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4760 set_am_attributes(call, &am);
4761 call = fix_mem_proj(call, &am);
4763 if (get_irn_pinned(node) == op_pin_state_pinned)
4764 set_irn_pinned(call, op_pin_state_pinned);
4766 SET_IA32_ORIG_NODE(call, node);
4768 if (ia32_cg_config.use_sse2) {
4769 /* remember this call for post-processing */
4770 ARR_APP1(ir_node *, call_list, call);
4771 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4778 * Transform Builtin trap
4780 static ir_node *gen_trap(ir_node *node)
4782 dbg_info *dbgi = get_irn_dbg_info(node);
4783 ir_node *block = be_transform_node(get_nodes_block(node));
4784 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4786 return new_bd_ia32_UD2(dbgi, block, mem);
4790 * Transform Builtin debugbreak
4792 static ir_node *gen_debugbreak(ir_node *node)
4794 dbg_info *dbgi = get_irn_dbg_info(node);
4795 ir_node *block = be_transform_node(get_nodes_block(node));
4796 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4798 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4802 * Transform Builtin return_address
4804 static ir_node *gen_return_address(ir_node *node)
4806 ir_node *param = get_Builtin_param(node, 0);
4807 ir_node *frame = get_Builtin_param(node, 1);
4808 dbg_info *dbgi = get_irn_dbg_info(node);
4809 ir_tarval *tv = get_Const_tarval(param);
4810 unsigned long value = get_tarval_long(tv);
4812 ir_node *block = be_transform_node(get_nodes_block(node));
4813 ir_node *ptr = be_transform_node(frame);
4817 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4818 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4819 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4822 /* load the return address from this frame */
4823 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4825 set_irn_pinned(load, get_irn_pinned(node));
4826 set_ia32_op_type(load, ia32_AddrModeS);
4827 set_ia32_ls_mode(load, mode_Iu);
4829 set_ia32_am_offs_int(load, 0);
4830 set_ia32_use_frame(load);
4831 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4833 if (get_irn_pinned(node) == op_pin_state_floats) {
4834 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4835 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4836 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4837 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4840 SET_IA32_ORIG_NODE(load, node);
4841 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4845 * Transform Builtin frame_address
4847 static ir_node *gen_frame_address(ir_node *node)
4849 ir_node *param = get_Builtin_param(node, 0);
4850 ir_node *frame = get_Builtin_param(node, 1);
4851 dbg_info *dbgi = get_irn_dbg_info(node);
4852 ir_tarval *tv = get_Const_tarval(param);
4853 unsigned long value = get_tarval_long(tv);
4855 ir_node *block = be_transform_node(get_nodes_block(node));
4856 ir_node *ptr = be_transform_node(frame);
4861 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4862 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4863 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4866 /* load the frame address from this frame */
4867 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4869 set_irn_pinned(load, get_irn_pinned(node));
4870 set_ia32_op_type(load, ia32_AddrModeS);
4871 set_ia32_ls_mode(load, mode_Iu);
4873 ent = ia32_get_frame_address_entity();
4875 set_ia32_am_offs_int(load, 0);
4876 set_ia32_use_frame(load);
4877 set_ia32_frame_ent(load, ent);
4879 /* will fail anyway, but gcc does this: */
4880 set_ia32_am_offs_int(load, 0);
4883 if (get_irn_pinned(node) == op_pin_state_floats) {
4884 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4885 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4886 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4887 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4890 SET_IA32_ORIG_NODE(load, node);
4891 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4895 * Transform Builtin frame_address
4897 static ir_node *gen_prefetch(ir_node *node)
4900 ir_node *ptr, *block, *mem, *base, *index;
4901 ir_node *param, *new_node;
4904 ia32_address_t addr;
4906 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4907 /* no prefetch at all, route memory */
4908 return be_transform_node(get_Builtin_mem(node));
4911 param = get_Builtin_param(node, 1);
4912 tv = get_Const_tarval(param);
4913 rw = get_tarval_long(tv);
4915 /* construct load address */
4916 memset(&addr, 0, sizeof(addr));
4917 ptr = get_Builtin_param(node, 0);
4918 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4925 base = be_transform_node(base);
4928 if (index == NULL) {
4931 index = be_transform_node(index);
4934 dbgi = get_irn_dbg_info(node);
4935 block = be_transform_node(get_nodes_block(node));
4936 mem = be_transform_node(get_Builtin_mem(node));
4938 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4939 /* we have 3DNow!, this was already checked above */
4940 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4941 } else if (ia32_cg_config.use_sse_prefetch) {
4942 /* note: rw == 1 is IGNORED in that case */
4943 param = get_Builtin_param(node, 2);
4944 tv = get_Const_tarval(param);
4945 locality = get_tarval_long(tv);
4947 /* SSE style prefetch */
4950 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4953 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4956 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4959 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4963 assert(ia32_cg_config.use_3dnow_prefetch);
4964 /* 3DNow! style prefetch */
4965 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4968 set_irn_pinned(new_node, get_irn_pinned(node));
4969 set_ia32_op_type(new_node, ia32_AddrModeS);
4970 set_ia32_ls_mode(new_node, mode_Bu);
4971 set_address(new_node, &addr);
4973 SET_IA32_ORIG_NODE(new_node, node);
4975 be_dep_on_frame(new_node);
4976 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4980 * Transform bsf like node
4982 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4984 ir_node *param = get_Builtin_param(node, 0);
4985 dbg_info *dbgi = get_irn_dbg_info(node);
4987 ir_node *block = get_nodes_block(node);
4988 ir_node *new_block = be_transform_node(block);
4990 ia32_address_mode_t am;
4991 ia32_address_t *addr = &am.addr;
4994 match_arguments(&am, block, NULL, param, NULL, match_am);
4996 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4997 set_am_attributes(cnt, &am);
4998 set_ia32_ls_mode(cnt, get_irn_mode(param));
5000 SET_IA32_ORIG_NODE(cnt, node);
5001 return fix_mem_proj(cnt, &am);
5005 * Transform builtin ffs.
5007 static ir_node *gen_ffs(ir_node *node)
5009 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5010 ir_node *real = skip_Proj(bsf);
5011 dbg_info *dbgi = get_irn_dbg_info(real);
5012 ir_node *block = get_nodes_block(real);
5013 ir_node *flag, *set, *conv, *neg, *orn;
5016 if (get_irn_mode(real) != mode_T) {
5017 set_irn_mode(real, mode_T);
5018 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5021 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5024 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5025 SET_IA32_ORIG_NODE(set, node);
5028 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5029 SET_IA32_ORIG_NODE(conv, node);
5032 neg = new_bd_ia32_Neg(dbgi, block, conv);
5035 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5036 set_ia32_commutative(orn);
5039 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, orn, ia32_create_Immediate(NULL, 0, 1));
5043 * Transform builtin clz.
5045 static ir_node *gen_clz(ir_node *node)
5047 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5048 ir_node *real = skip_Proj(bsr);
5049 dbg_info *dbgi = get_irn_dbg_info(real);
5050 ir_node *block = get_nodes_block(real);
5051 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5053 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5057 * Transform builtin ctz.
5059 static ir_node *gen_ctz(ir_node *node)
5061 return gen_unop_AM(node, new_bd_ia32_Bsf);
5065 * Transform builtin parity.
5067 static ir_node *gen_parity(ir_node *node)
5069 dbg_info *dbgi = get_irn_dbg_info(node);
5070 ir_node *block = get_nodes_block(node);
5071 ir_node *new_block = be_transform_node(block);
5072 ir_node *param = get_Builtin_param(node, 0);
5073 ir_node *new_param = be_transform_node(param);
5076 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5077 * so we have to do complicated xoring first.
5078 * (we should also better lower this before the backend so we still have a
5079 * chance for CSE, constant folding and other goodies for some of these
5082 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5083 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5084 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5086 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5089 set_irn_mode(xor2, mode_T);
5090 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5093 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5094 SET_IA32_ORIG_NODE(new_node, node);
5097 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5098 nomem, new_node, mode_Bu);
5099 SET_IA32_ORIG_NODE(new_node, node);
5104 * Transform builtin popcount
5106 static ir_node *gen_popcount(ir_node *node)
5108 ir_node *param = get_Builtin_param(node, 0);
5109 dbg_info *dbgi = get_irn_dbg_info(node);
5111 ir_node *block = get_nodes_block(node);
5112 ir_node *new_block = be_transform_node(block);
5115 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5117 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5118 if (ia32_cg_config.use_popcnt) {
5119 ia32_address_mode_t am;
5120 ia32_address_t *addr = &am.addr;
5123 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5125 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5126 set_am_attributes(cnt, &am);
5127 set_ia32_ls_mode(cnt, get_irn_mode(param));
5129 SET_IA32_ORIG_NODE(cnt, node);
5130 return fix_mem_proj(cnt, &am);
5133 new_param = be_transform_node(param);
5135 /* do the standard popcount algo */
5136 /* TODO: This is stupid, we should transform this before the backend,
5137 * to get CSE, localopts, etc. for the operations
5138 * TODO: This is also not the optimal algorithm (it is just the starting
5139 * example in hackers delight, they optimize it more on the following page)
5140 * But I'm too lazy to fix this now, as the code should get lowered before
5141 * the backend anyway.
5144 /* m1 = x & 0x55555555 */
5145 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5146 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5149 simm = ia32_create_Immediate(NULL, 0, 1);
5150 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5152 /* m2 = s1 & 0x55555555 */
5153 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5156 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5158 /* m4 = m3 & 0x33333333 */
5159 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5160 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5163 simm = ia32_create_Immediate(NULL, 0, 2);
5164 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5166 /* m5 = s2 & 0x33333333 */
5167 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5170 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5172 /* m7 = m6 & 0x0F0F0F0F */
5173 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5174 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5177 simm = ia32_create_Immediate(NULL, 0, 4);
5178 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5180 /* m8 = s3 & 0x0F0F0F0F */
5181 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5184 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5186 /* m10 = m9 & 0x00FF00FF */
5187 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5188 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5191 simm = ia32_create_Immediate(NULL, 0, 8);
5192 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5194 /* m11 = s4 & 0x00FF00FF */
5195 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5197 /* m12 = m10 + m11 */
5198 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5200 /* m13 = m12 & 0x0000FFFF */
5201 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5202 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5204 /* s5 = m12 >> 16 */
5205 simm = ia32_create_Immediate(NULL, 0, 16);
5206 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5208 /* res = m13 + s5 */
5209 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5213 * Transform builtin byte swap.
5215 static ir_node *gen_bswap(ir_node *node)
5217 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5218 dbg_info *dbgi = get_irn_dbg_info(node);
5220 ir_node *block = get_nodes_block(node);
5221 ir_node *new_block = be_transform_node(block);
5222 ir_mode *mode = get_irn_mode(param);
5223 unsigned size = get_mode_size_bits(mode);
5224 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5228 if (ia32_cg_config.use_i486) {
5229 /* swap available */
5230 return new_bd_ia32_Bswap(dbgi, new_block, param);
5232 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5233 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5235 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5236 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5238 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5240 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5241 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5243 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5244 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5247 /* swap16 always available */
5248 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5251 panic("Invalid bswap size (%d)", size);
5256 * Transform builtin outport.
5258 static ir_node *gen_outport(ir_node *node)
5260 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5261 ir_node *oldv = get_Builtin_param(node, 1);
5262 ir_mode *mode = get_irn_mode(oldv);
5263 ir_node *value = be_transform_node(oldv);
5264 ir_node *block = be_transform_node(get_nodes_block(node));
5265 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5266 dbg_info *dbgi = get_irn_dbg_info(node);
5268 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5269 set_ia32_ls_mode(res, mode);
5274 * Transform builtin inport.
5276 static ir_node *gen_inport(ir_node *node)
5278 ir_type *tp = get_Builtin_type(node);
5279 ir_type *rstp = get_method_res_type(tp, 0);
5280 ir_mode *mode = get_type_mode(rstp);
5281 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5282 ir_node *block = be_transform_node(get_nodes_block(node));
5283 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5284 dbg_info *dbgi = get_irn_dbg_info(node);
5286 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5287 set_ia32_ls_mode(res, mode);
5289 /* check for missing Result Proj */
5294 * Transform a builtin inner trampoline
5296 static ir_node *gen_inner_trampoline(ir_node *node)
5298 ir_node *ptr = get_Builtin_param(node, 0);
5299 ir_node *callee = get_Builtin_param(node, 1);
5300 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5301 ir_node *mem = get_Builtin_mem(node);
5302 ir_node *block = get_nodes_block(node);
5303 ir_node *new_block = be_transform_node(block);
5307 ir_node *trampoline;
5309 dbg_info *dbgi = get_irn_dbg_info(node);
5310 ia32_address_t addr;
5312 /* construct store address */
5313 memset(&addr, 0, sizeof(addr));
5314 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5316 if (addr.base == NULL) {
5317 addr.base = noreg_GP;
5319 addr.base = be_transform_node(addr.base);
5322 if (addr.index == NULL) {
5323 addr.index = noreg_GP;
5325 addr.index = be_transform_node(addr.index);
5327 addr.mem = be_transform_node(mem);
5329 /* mov ecx, <env> */
5330 val = ia32_create_Immediate(NULL, 0, 0xB9);
5331 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5332 addr.index, addr.mem, val);
5333 set_irn_pinned(store, get_irn_pinned(node));
5334 set_ia32_op_type(store, ia32_AddrModeD);
5335 set_ia32_ls_mode(store, mode_Bu);
5336 set_address(store, &addr);
5340 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5341 addr.index, addr.mem, env);
5342 set_irn_pinned(store, get_irn_pinned(node));
5343 set_ia32_op_type(store, ia32_AddrModeD);
5344 set_ia32_ls_mode(store, mode_Iu);
5345 set_address(store, &addr);
5349 /* jmp rel <callee> */
5350 val = ia32_create_Immediate(NULL, 0, 0xE9);
5351 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5352 addr.index, addr.mem, val);
5353 set_irn_pinned(store, get_irn_pinned(node));
5354 set_ia32_op_type(store, ia32_AddrModeD);
5355 set_ia32_ls_mode(store, mode_Bu);
5356 set_address(store, &addr);
5360 trampoline = be_transform_node(ptr);
5362 /* the callee is typically an immediate */
5363 if (is_SymConst(callee)) {
5364 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5366 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5368 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5370 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5371 addr.index, addr.mem, rel);
5372 set_irn_pinned(store, get_irn_pinned(node));
5373 set_ia32_op_type(store, ia32_AddrModeD);
5374 set_ia32_ls_mode(store, mode_Iu);
5375 set_address(store, &addr);
5380 return new_r_Tuple(new_block, 2, in);
5384 * Transform Builtin node.
5386 static ir_node *gen_Builtin(ir_node *node)
5388 ir_builtin_kind kind = get_Builtin_kind(node);
5392 return gen_trap(node);
5393 case ir_bk_debugbreak:
5394 return gen_debugbreak(node);
5395 case ir_bk_return_address:
5396 return gen_return_address(node);
5397 case ir_bk_frame_address:
5398 return gen_frame_address(node);
5399 case ir_bk_prefetch:
5400 return gen_prefetch(node);
5402 return gen_ffs(node);
5404 return gen_clz(node);
5406 return gen_ctz(node);
5408 return gen_parity(node);
5409 case ir_bk_popcount:
5410 return gen_popcount(node);
5412 return gen_bswap(node);
5414 return gen_outport(node);
5416 return gen_inport(node);
5417 case ir_bk_inner_trampoline:
5418 return gen_inner_trampoline(node);
5420 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5424 * Transform Proj(Builtin) node.
5426 static ir_node *gen_Proj_Builtin(ir_node *proj)
5428 ir_node *node = get_Proj_pred(proj);
5429 ir_node *new_node = be_transform_node(node);
5430 ir_builtin_kind kind = get_Builtin_kind(node);
5433 case ir_bk_return_address:
5434 case ir_bk_frame_address:
5439 case ir_bk_popcount:
5441 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5444 case ir_bk_debugbreak:
5445 case ir_bk_prefetch:
5447 assert(get_Proj_proj(proj) == pn_Builtin_M);
5450 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5451 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5453 assert(get_Proj_proj(proj) == pn_Builtin_M);
5454 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5456 case ir_bk_inner_trampoline:
5457 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5458 return get_Tuple_pred(new_node, 1);
5460 assert(get_Proj_proj(proj) == pn_Builtin_M);
5461 return get_Tuple_pred(new_node, 0);
5464 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5467 static ir_node *gen_be_IncSP(ir_node *node)
5469 ir_node *res = be_duplicate_node(node);
5470 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5476 * Transform the Projs from a be_Call.
5478 static ir_node *gen_Proj_be_Call(ir_node *node)
5480 ir_node *call = get_Proj_pred(node);
5481 ir_node *new_call = be_transform_node(call);
5482 dbg_info *dbgi = get_irn_dbg_info(node);
5483 long proj = get_Proj_proj(node);
5484 ir_mode *mode = get_irn_mode(node);
5487 if (proj == pn_be_Call_M_regular) {
5488 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5490 /* transform call modes */
5491 if (mode_is_data(mode)) {
5492 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5496 /* Map from be_Call to ia32_Call proj number */
5497 if (proj == pn_be_Call_sp) {
5498 proj = pn_ia32_Call_stack;
5499 } else if (proj == pn_be_Call_M_regular) {
5500 proj = pn_ia32_Call_M;
5502 arch_register_req_t const *const req = arch_get_register_req_out(node);
5503 int const n_outs = arch_irn_get_n_outs(new_call);
5506 assert(proj >= pn_be_Call_first_res);
5507 assert(req->type & arch_register_req_type_limited);
5509 for (i = 0; i < n_outs; ++i) {
5510 arch_register_req_t const *const new_req
5511 = arch_get_out_register_req(new_call, i);
5513 if (!(new_req->type & arch_register_req_type_limited) ||
5514 new_req->cls != req->cls ||
5515 *new_req->limited != *req->limited)
5524 res = new_rd_Proj(dbgi, new_call, mode, proj);
5526 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5528 case pn_ia32_Call_stack:
5529 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5532 case pn_ia32_Call_fpcw:
5533 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5541 * Transform the Projs from a Cmp.
5543 static ir_node *gen_Proj_Cmp(ir_node *node)
5545 /* this probably means not all mode_b nodes were lowered... */
5546 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5550 static ir_node *gen_Proj_ASM(ir_node *node)
5552 ir_mode *mode = get_irn_mode(node);
5553 ir_node *pred = get_Proj_pred(node);
5554 ir_node *new_pred = be_transform_node(pred);
5555 long pos = get_Proj_proj(node);
5557 if (mode == mode_M) {
5558 pos = arch_irn_get_n_outs(new_pred)-1;
5559 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5561 } else if (mode_is_float(mode)) {
5564 panic("unexpected proj mode at ASM");
5567 return new_r_Proj(new_pred, mode, pos);
5571 * Transform and potentially renumber Proj nodes.
5573 static ir_node *gen_Proj(ir_node *node)
5575 ir_node *pred = get_Proj_pred(node);
5578 switch (get_irn_opcode(pred)) {
5580 proj = get_Proj_proj(node);
5581 if (proj == pn_Store_M) {
5582 return be_transform_node(pred);
5584 panic("No idea how to transform proj->Store");
5587 return gen_Proj_Load(node);
5589 return gen_Proj_ASM(node);
5591 return gen_Proj_Builtin(node);
5593 return gen_Proj_Div(node);
5595 return gen_Proj_Mod(node);
5597 return gen_Proj_CopyB(node);
5599 return gen_Proj_be_SubSP(node);
5601 return gen_Proj_be_AddSP(node);
5603 return gen_Proj_be_Call(node);
5605 return gen_Proj_Cmp(node);
5607 proj = get_Proj_proj(node);
5609 case pn_Start_X_initial_exec: {
5610 ir_node *block = get_nodes_block(pred);
5611 ir_node *new_block = be_transform_node(block);
5612 dbg_info *dbgi = get_irn_dbg_info(node);
5613 /* we exchange the ProjX with a jump */
5614 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5619 case pn_Start_P_tls:
5620 return ia32_gen_Proj_tls(node);
5625 if (is_ia32_l_FloattoLL(pred)) {
5626 return gen_Proj_l_FloattoLL(node);
5628 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5632 ir_mode *mode = get_irn_mode(node);
5633 if (ia32_mode_needs_gp_reg(mode)) {
5634 ir_node *new_pred = be_transform_node(pred);
5635 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5636 get_Proj_proj(node));
5637 new_proj->node_nr = node->node_nr;
5642 return be_duplicate_node(node);
5646 * Enters all transform functions into the generic pointer
5648 static void register_transformers(void)
5650 /* first clear the generic function pointer for all ops */
5651 be_start_transform_setup();
5653 be_set_transform_function(op_Add, gen_Add);
5654 be_set_transform_function(op_And, gen_And);
5655 be_set_transform_function(op_ASM, ia32_gen_ASM);
5656 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5657 be_set_transform_function(op_be_Call, gen_be_Call);
5658 be_set_transform_function(op_be_Copy, gen_be_Copy);
5659 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5660 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5661 be_set_transform_function(op_be_Return, gen_be_Return);
5662 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5663 be_set_transform_function(op_Builtin, gen_Builtin);
5664 be_set_transform_function(op_Cmp, gen_Cmp);
5665 be_set_transform_function(op_Cond, gen_Cond);
5666 be_set_transform_function(op_Const, gen_Const);
5667 be_set_transform_function(op_Conv, gen_Conv);
5668 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5669 be_set_transform_function(op_Div, gen_Div);
5670 be_set_transform_function(op_Eor, gen_Eor);
5671 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5672 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5673 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5674 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5675 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5676 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5677 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5678 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5679 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5680 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5681 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5682 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5683 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5684 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5685 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5686 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5687 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5688 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5689 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5690 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5691 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5692 be_set_transform_function(op_IJmp, gen_IJmp);
5693 be_set_transform_function(op_Jmp, gen_Jmp);
5694 be_set_transform_function(op_Load, gen_Load);
5695 be_set_transform_function(op_Minus, gen_Minus);
5696 be_set_transform_function(op_Mod, gen_Mod);
5697 be_set_transform_function(op_Mul, gen_Mul);
5698 be_set_transform_function(op_Mulh, gen_Mulh);
5699 be_set_transform_function(op_Mux, gen_Mux);
5700 be_set_transform_function(op_Not, gen_Not);
5701 be_set_transform_function(op_Or, gen_Or);
5702 be_set_transform_function(op_Phi, gen_Phi);
5703 be_set_transform_function(op_Proj, gen_Proj);
5704 be_set_transform_function(op_Rotl, gen_Rotl);
5705 be_set_transform_function(op_Shl, gen_Shl);
5706 be_set_transform_function(op_Shr, gen_Shr);
5707 be_set_transform_function(op_Shrs, gen_Shrs);
5708 be_set_transform_function(op_Store, gen_Store);
5709 be_set_transform_function(op_Sub, gen_Sub);
5710 be_set_transform_function(op_SymConst, gen_SymConst);
5711 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5715 * Pre-transform all unknown and noreg nodes.
5717 static void ia32_pretransform_node(void)
5719 ir_graph *irg = current_ir_graph;
5720 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5722 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5723 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5724 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5726 nomem = get_irg_no_mem(irg);
5727 noreg_GP = ia32_new_NoReg_gp(irg);
5733 * Post-process all calls if we are in SSE mode.
5734 * The ABI requires that the results are in st0, copy them
5735 * to a xmm register.
5737 static void postprocess_fp_call_results(void)
5741 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5742 ir_node *call = call_list[i];
5743 ir_type *mtp = call_types[i];
5746 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5747 ir_type *res_tp = get_method_res_type(mtp, j);
5748 ir_node *res, *new_res;
5749 const ir_edge_t *edge, *next;
5752 if (! is_atomic_type(res_tp)) {
5753 /* no floating point return */
5756 mode = get_type_mode(res_tp);
5757 if (! mode_is_float(mode)) {
5758 /* no floating point return */
5762 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5765 /* now patch the users */
5766 foreach_out_edge_safe(res, edge, next) {
5767 ir_node *succ = get_edge_src_irn(edge);
5770 if (be_is_Keep(succ))
5773 if (is_ia32_xStore(succ)) {
5774 /* an xStore can be patched into an vfst */
5775 dbg_info *db = get_irn_dbg_info(succ);
5776 ir_node *block = get_nodes_block(succ);
5777 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5778 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5779 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5780 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5781 ir_mode *mode = get_ia32_ls_mode(succ);
5783 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5784 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5785 if (is_ia32_use_frame(succ))
5786 set_ia32_use_frame(st);
5787 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5788 set_irn_pinned(st, get_irn_pinned(succ));
5789 set_ia32_op_type(st, ia32_AddrModeD);
5793 if (new_res == NULL) {
5794 dbg_info *db = get_irn_dbg_info(call);
5795 ir_node *block = get_nodes_block(call);
5796 ir_node *frame = get_irg_frame(current_ir_graph);
5797 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5798 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5799 ir_node *vfst, *xld, *new_mem;
5801 /* store st(0) on stack */
5802 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5803 set_ia32_op_type(vfst, ia32_AddrModeD);
5804 set_ia32_use_frame(vfst);
5806 /* load into SSE register */
5807 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5808 set_ia32_op_type(xld, ia32_AddrModeS);
5809 set_ia32_use_frame(xld);
5811 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5812 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5814 if (old_mem != NULL) {
5815 edges_reroute(old_mem, new_mem, current_ir_graph);
5819 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5826 /* do the transformation */
5827 void ia32_transform_graph(ir_graph *irg)
5831 register_transformers();
5832 initial_fpcw = NULL;
5833 ia32_no_pic_adjust = 0;
5835 be_timer_push(T_HEIGHTS);
5836 ia32_heights = heights_new(irg);
5837 be_timer_pop(T_HEIGHTS);
5838 ia32_calculate_non_address_mode_nodes(irg);
5840 /* the transform phase is not safe for CSE (yet) because several nodes get
5841 * attributes set after their creation */
5842 cse_last = get_opt_cse();
5845 call_list = NEW_ARR_F(ir_node *, 0);
5846 call_types = NEW_ARR_F(ir_type *, 0);
5847 be_transform_graph(irg, ia32_pretransform_node);
5849 if (ia32_cg_config.use_sse2)
5850 postprocess_fp_call_results();
5851 DEL_ARR_F(call_types);
5852 DEL_ARR_F(call_list);
5854 set_opt_cse(cse_last);
5856 ia32_free_non_address_mode_nodes();
5857 heights_free(ia32_heights);
5858 ia32_heights = NULL;
5861 void ia32_init_transform(void)
5863 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");