2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief emit assembler for a backend graph
23 * @author Hannes Rapp, Matthias Braun
29 #include "bitfiddle.h"
41 #include "raw_bitset.h"
45 #include "execfreq_t.h"
48 #include "beblocksched.h"
54 #include "bepeephole.h"
56 #include "sparc_emitter.h"
57 #include "gen_sparc_emitter.h"
58 #include "sparc_nodes_attr.h"
59 #include "sparc_new_nodes.h"
60 #include "gen_sparc_regalloc_if.h"
62 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
64 static ir_heights_t *heights;
65 static unsigned *delay_slot_fillers;
66 static pmap *delay_slots;
68 static bool emitting_delay_slot;
71 * indent before instruction. (Adds additional indentation when emitting
74 static void sparc_emit_indent(void)
77 if (emitting_delay_slot)
81 static void sparc_emit_immediate(ir_node const *const node)
83 const sparc_attr_t *attr = get_sparc_attr_const(node);
84 ir_entity *entity = attr->immediate_value_entity;
87 int32_t value = attr->immediate_value;
88 assert(sparc_is_value_imm_encodeable(value));
89 be_emit_irprintf("%d", value);
91 if (get_entity_owner(entity) == get_tls_type()) {
92 be_emit_cstring("%tle_lox10(");
94 be_emit_cstring("%lo(");
96 be_gas_emit_entity(entity);
97 if (attr->immediate_value != 0) {
98 be_emit_irprintf("%+d", attr->immediate_value);
104 static void sparc_emit_high_immediate(ir_node const *node)
106 const sparc_attr_t *attr = get_sparc_attr_const(node);
107 ir_entity *entity = attr->immediate_value_entity;
109 if (entity == NULL) {
110 uint32_t value = (uint32_t) attr->immediate_value;
111 be_emit_irprintf("%%hi(0x%X)", value);
113 if (get_entity_owner(entity) == get_tls_type()) {
114 be_emit_cstring("%tle_hix22(");
116 be_emit_cstring("%hi(");
118 be_gas_emit_entity(entity);
119 if (attr->immediate_value != 0) {
120 be_emit_irprintf("%+d", attr->immediate_value);
126 static void sparc_emit_source_register(ir_node const *node, int const pos)
128 const arch_register_t *reg = arch_get_irn_register_in(node, pos);
130 be_emit_string(reg->name);
133 static void sparc_emit_dest_register(ir_node const *const node, int const pos)
135 const arch_register_t *reg = arch_get_irn_register_out(node, pos);
137 be_emit_string(reg->name);
143 static void sparc_emit_offset(const ir_node *node, int offset_node_pos)
145 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
147 if (attr->is_reg_reg) {
148 assert(!attr->is_frame_entity);
149 assert(attr->base.immediate_value == 0);
150 assert(attr->base.immediate_value_entity == NULL);
152 sparc_emit_source_register(node, offset_node_pos);
153 } else if (attr->is_frame_entity) {
154 int32_t offset = attr->base.immediate_value;
156 assert(sparc_is_value_imm_encodeable(offset));
157 be_emit_irprintf("%+ld", offset);
159 } else if (attr->base.immediate_value != 0
160 || attr->base.immediate_value_entity != NULL) {
162 sparc_emit_immediate(node);
169 static void sparc_emit_load_mode(ir_node const *const node)
171 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
172 ir_mode *mode = attr->load_store_mode;
173 int bits = get_mode_size_bits(mode);
174 bool is_signed = mode_is_signed(mode);
177 case 8: be_emit_string(is_signed ? "sb" : "ub"); break;
178 case 16: be_emit_string(is_signed ? "sh" : "uh"); break;
180 case 64: be_emit_char('d'); break;
181 case 128: be_emit_char('q'); break;
182 default: panic("invalid load/store mode %+F", mode);
187 * Emit store mode char
189 static void sparc_emit_store_mode(ir_node const *const node)
191 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
192 ir_mode *mode = attr->load_store_mode;
193 int bits = get_mode_size_bits(mode);
196 case 8: be_emit_char('b'); break;
197 case 16: be_emit_char('h'); break;
199 case 64: be_emit_char('d'); break;
200 case 128: be_emit_char('q'); break;
201 default: panic("invalid load/store mode %+F", mode);
205 static void emit_fp_suffix(const ir_mode *mode)
207 assert(mode_is_float(mode));
208 switch (get_mode_size_bits(mode)) {
209 case 32: be_emit_char('s'); break;
210 case 64: be_emit_char('d'); break;
211 case 128: be_emit_char('q'); break;
212 default: panic("invalid FP mode");
216 static void set_jump_target(ir_node *jump, ir_node *target)
218 set_irn_link(jump, target);
221 static ir_node *get_jump_target(const ir_node *jump)
223 return (ir_node*)get_irn_link(jump);
227 * Returns the target label for a control flow node.
229 static void sparc_emit_cfop_target(const ir_node *node)
231 ir_node *block = get_jump_target(node);
232 be_gas_emit_block_name(block);
236 * returns true if a sparc_call calls a register and not an immediate
238 static bool is_sparc_reg_call(const ir_node *node)
240 const sparc_attr_t *attr = get_sparc_attr_const(node);
241 return attr->immediate_value_entity == NULL;
244 static int get_sparc_Call_dest_addr_pos(const ir_node *node)
246 assert(is_sparc_reg_call(node));
247 return get_irn_arity(node)-1;
250 static bool ba_is_fallthrough(const ir_node *node)
252 ir_node *block = get_nodes_block(node);
253 ir_node *next_block = (ir_node*)get_irn_link(block);
254 return get_jump_target(node) == next_block;
257 static bool is_no_instruction(const ir_node *node)
259 /* copies are nops if src_reg == dest_reg */
260 if (be_is_Copy(node) || be_is_CopyKeep(node)) {
261 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
262 const arch_register_t *dest_reg = arch_get_irn_register_out(node, 0);
264 if (src_reg == dest_reg)
267 if (be_is_IncSP(node) && be_get_IncSP_offset(node) == 0)
269 /* Ba is not emitted if it is a simple fallthrough */
270 if (is_sparc_Ba(node) && ba_is_fallthrough(node))
273 return be_is_Keep(node) || be_is_Start(node) || is_Phi(node);
276 static bool has_delay_slot(const ir_node *node)
278 if (is_sparc_Ba(node)) {
279 return !ba_is_fallthrough(node);
282 return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot;
285 /** returns true if the emitter for this sparc node can produce more than one
286 * actual sparc instruction.
287 * Usually it is a bad sign if we have to add instructions here. We should
288 * rather try to get them lowered down. So we can actually put them into
289 * delay slots and make them more accessible to the scheduler.
291 static bool emits_multiple_instructions(const ir_node *node)
293 if (has_delay_slot(node))
296 if (is_sparc_Call(node))
297 return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return;
299 return is_sparc_SMulh(node) || is_sparc_UMulh(node)
300 || is_sparc_SDiv(node) || is_sparc_UDiv(node)
301 || be_is_MemPerm(node) || be_is_Perm(node)
302 || is_sparc_SubSP(node);
305 static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width)
307 int arity = get_irn_arity(node);
308 for (int i = 0; i < arity; ++i) {
309 const arch_register_t *in_reg = arch_get_irn_register_in(node, i);
310 const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i);
313 if (reg_index < (unsigned)in_reg->global_index + in_req->width
314 && reg_index + width > in_reg->global_index)
320 static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width)
322 be_foreach_out(node, o) {
323 const arch_register_t *out_reg = arch_get_irn_register_out(node, o);
326 const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o);
327 if (reg_index < (unsigned)out_reg->global_index + out_req->width
328 && reg_index + width > out_reg->global_index)
334 static bool is_legal_delay_slot_filler(const ir_node *node)
336 if (is_no_instruction(node))
338 if (emits_multiple_instructions(node))
340 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
345 static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to)
347 if (!is_legal_delay_slot_filler(node))
350 if (!be_can_move_down(heights, node, to))
353 if (is_sparc_Call(to)) {
355 /** all inputs are used after the delay slot so, we're fine */
356 if (!is_sparc_reg_call(to))
359 check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to));
360 if (skip_Proj(check) == node)
363 /* the Call also destroys the value of %o7, but since this is
364 * currently marked as ignore register in the backend, it
365 * should never be used by the instruction in the delay slot. */
366 if (uses_reg(node, REG_O7, 1))
369 } else if (is_sparc_Return(to)) {
370 /* return uses the value of %o7, all other values are not
371 * immediately used */
372 if (writes_reg(node, REG_O7, 1))
376 /* the node must not use our computed values */
377 int arity = get_irn_arity(to);
378 for (int i = 0; i < arity; ++i) {
379 ir_node *in = get_irn_n(to, i);
380 if (skip_Proj(in) == node)
387 static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to)
389 if (!be_can_move_up(heights, node, to))
392 /* node must not use any results of 'to' */
393 int arity = get_irn_arity(node);
394 for (int i = 0; i < arity; ++i) {
395 ir_node *in = get_irn_n(node, i);
396 ir_node *skipped = skip_Proj(in);
401 /* register window cycling effects at Restore aren't correctly represented
402 * in the graph yet so we need this exception here */
403 if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) {
405 } else if (is_sparc_Call(to)) {
406 /* node must not overwrite any of the inputs of the call,
407 * (except for the dest_addr) */
408 int dest_addr_pos = is_sparc_reg_call(to)
409 ? get_sparc_Call_dest_addr_pos(to) : -1;
411 int call_arity = get_irn_arity(to);
412 for (int i = 0; i < call_arity; ++i) {
413 if (i == dest_addr_pos)
415 const arch_register_t *reg = arch_get_irn_register_in(to, i);
418 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
419 if (writes_reg(node, reg->global_index, req->width))
423 /* node must not write to one of the call outputs */
424 be_foreach_out(to, o) {
425 const arch_register_t *reg = arch_get_irn_register_out(to, o);
428 const arch_register_req_t *req = arch_get_irn_register_req_out(to, o);
429 if (writes_reg(node, reg->global_index, req->width))
432 } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) {
433 /* node will be inserted between wr and div so it must not overwrite
434 * anything except the wr input */
435 int arity = get_irn_arity(to);
436 for (int i = 0; i < arity; ++i) {
437 assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high);
438 if (i == n_sparc_SDiv_dividend_high)
440 const arch_register_t *reg = arch_get_irn_register_in(to, i);
443 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
444 if (writes_reg(node, reg->global_index, req->width))
451 static void optimize_fallthrough(ir_node *node)
453 ir_node *proj_true = NULL;
454 ir_node *proj_false = NULL;
456 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
457 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
458 foreach_out_edge(node, edge) {
459 ir_node *proj = get_edge_src_irn(edge);
460 long nr = get_Proj_proj(proj);
461 if (nr == pn_sparc_Bicc_true) {
464 assert(nr == pn_sparc_Bicc_false);
468 assert(proj_true != NULL && proj_false != NULL);
470 /* for now, the code works for scheduled and non-schedules blocks */
471 const ir_node *block = get_nodes_block(node);
473 /* we have a block schedule */
474 const ir_node *next_block = (ir_node*)get_irn_link(block);
476 if (get_jump_target(proj_true) == next_block) {
477 /* exchange both proj destinations so the second one can be omitted */
478 set_Proj_proj(proj_true, pn_sparc_Bicc_false);
479 set_Proj_proj(proj_false, pn_sparc_Bicc_true);
481 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
482 attr->relation = get_negated_relation(attr->relation);
487 * search for an instruction that can fill the delay slot of @p node
489 static ir_node *pick_delay_slot_for(ir_node *node)
491 static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10;
492 assert(has_delay_slot(node));
494 if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
495 optimize_fallthrough(node);
499 sched_foreach_reverse_from(sched_prev(node), schedpoint) {
500 if (has_delay_slot(schedpoint))
502 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
505 if (!can_move_down_into_delayslot(schedpoint, node))
508 /* found something */
512 /* search after the current position */
514 sched_foreach_from(sched_next(node), schedpoint) {
515 if (has_delay_slot(schedpoint))
517 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
519 if (!is_legal_delay_slot_filler(schedpoint))
521 if (!can_move_up_into_delayslot(schedpoint, node))
524 /* found something */
528 /* look in successor blocks */
529 ir_node *block = get_nodes_block(node);
530 /* TODO: sort succs by execution frequency */
531 foreach_block_succ(block, edge) {
532 ir_node *succ = get_edge_src_irn(edge);
533 /* we can't easily move up stuff from blocks with multiple predecessors
534 * since the instruction is lacking for the other preds then.
535 * (We also don't have to do any phi translation) */
536 if (get_Block_n_cfgpreds(succ) > 1)
540 sched_foreach(succ, schedpoint) {
541 if (has_delay_slot(schedpoint))
543 /* can't move pinned nodes accross blocks */
544 if (get_irn_pinned(schedpoint) == op_pin_state_pinned)
546 /* restore doesn't model register window switching correctly,
547 * so it appears like we could move it, which is not true */
548 if (is_sparc_Restore(schedpoint)
549 || is_sparc_RestoreZero(schedpoint))
551 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
553 if (!is_legal_delay_slot_filler(schedpoint))
555 if (can_move_up_into_delayslot(schedpoint, node)) {
556 /* it's fine to move the insn accross blocks */
558 } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
559 ir_node *proj = get_Block_cfgpred(succ, 0);
560 long nr = get_Proj_proj(proj);
561 if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true)
562 && be_can_move_up(heights, schedpoint, succ)) {
563 /* we can use it with the annul flag */
564 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
565 attr->annul_delay_slot = true;
575 void sparc_emitf(ir_node const *const node, char const *fmt, ...)
581 char const *start = fmt;
583 while (*fmt != '%' && *fmt != '\0')
585 be_emit_string_len(start, fmt - start);
602 const sparc_jmp_cond_attr_t *attr
603 = get_sparc_jmp_cond_attr_const(node);
604 if (attr->annul_delay_slot) {
605 be_emit_cstring(",a");
611 if (*fmt < '0' || '9' <= *fmt)
613 sparc_emit_dest_register(node, *fmt++ - '0');
617 sparc_attr_t const *const attr = get_sparc_attr_const(node);
618 be_gas_emit_entity(attr->immediate_value_entity);
619 if (attr->immediate_value != 0) {
620 be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value);
628 case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break;
629 case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode; break;
630 case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode; break;
631 default: goto unknown;
633 emit_fp_suffix(mode);
638 sparc_emit_high_immediate(node);
642 ir_node *n = va_arg(ap, ir_node*);
643 sparc_emit_cfop_target(n);
649 case 'L': sparc_emit_load_mode(node); break;
650 case 'S': sparc_emit_store_mode(node); break;
651 default: goto unknown;
656 if (*fmt < '0' || '9' <= *fmt)
658 sparc_emit_offset(node, *fmt++ - '0');
662 arch_register_t const *const reg = va_arg(ap, const arch_register_t*);
664 be_emit_string(reg->name);
674 if (*fmt < '0' || '9' <= *fmt)
676 unsigned const pos = *fmt++ - '0';
677 if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) {
678 sparc_emit_immediate(node);
680 sparc_emit_source_register(node, pos);
686 int const num = va_arg(ap, int);
687 be_emit_irprintf(plus ? "%+d" : "%d", num);
692 char const *const str = va_arg(ap, char const*);
698 unsigned const num = va_arg(ap, unsigned);
699 be_emit_irprintf(plus ? "%+u" : "%u", num);
705 panic("unknown format conversion in sparc_emitf()");
708 be_emit_finish_line_gas(node);
713 * Emits code for stack space management
715 static void emit_be_IncSP(const ir_node *irn)
717 int offset = be_get_IncSP_offset(irn);
722 /* SPARC stack grows downwards */
723 char const *const insn = offset > 0 ? offset = -offset, "add" : "sub";
724 sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset);
728 * Emits code for stack space management.
730 static void emit_sparc_SubSP(const ir_node *irn)
732 sparc_emitf(irn, "sub %S0, %SI1, %D0");
733 sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE);
736 static void fill_delay_slot(const ir_node *node)
738 emitting_delay_slot = true;
739 const ir_node *filler = pmap_get(ir_node, delay_slots, node);
740 if (filler != NULL) {
741 assert(!is_no_instruction(filler));
742 assert(!emits_multiple_instructions(filler));
743 be_emit_node(filler);
745 sparc_emitf(NULL, "nop");
747 emitting_delay_slot = false;
750 static void emit_sparc_Div(const ir_node *node, char const *const insn)
752 sparc_emitf(node, "wr %S0, 0, %%y");
754 /* TODO: we should specify number of delayslots in an architecture
756 unsigned wry_delay_count = 3;
757 for (unsigned i = 0; i < wry_delay_count; ++i) {
759 fill_delay_slot(node);
761 emitting_delay_slot = true;
762 sparc_emitf(NULL, "nop");
763 emitting_delay_slot = false;
767 sparc_emitf(node, "%s %S1, %SI2, %D0", insn);
770 static void emit_sparc_SDiv(const ir_node *node)
772 emit_sparc_Div(node, "sdiv");
775 static void emit_sparc_UDiv(const ir_node *node)
777 emit_sparc_Div(node, "udiv");
780 static void emit_sparc_Call(const ir_node *node)
782 if (is_sparc_reg_call(node)) {
783 int dest_addr = get_sparc_Call_dest_addr_pos(node);
784 sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr));
786 sparc_emitf(node, "call %E, 0");
789 fill_delay_slot(node);
791 if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) {
792 sparc_emitf(NULL, "unimp 8");
796 static void emit_be_Perm(const ir_node *irn)
798 ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
799 if (mode_is_float(mode)) {
800 const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0);
801 const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1);
802 unsigned reg_idx0 = reg0->global_index;
803 unsigned reg_idx1 = reg1->global_index;
804 unsigned width = arch_get_irn_register_req_in(irn, 0)->width;
805 for (unsigned i = 0; i < width; ++i) {
806 const arch_register_t *r0 = &sparc_registers[reg_idx0+i];
807 const arch_register_t *r1 = &sparc_registers[reg_idx1+i];
808 sparc_emitf(irn, "fmovs %R, %%f31", r0);
809 sparc_emitf(irn, "fmovs %R, %R", r1, r0);
810 sparc_emitf(irn, "fmovs %%f31, %R", r1);
813 sparc_emitf(irn, "xor %S1, %S0, %S0");
814 sparc_emitf(irn, "xor %S1, %S0, %S1");
815 sparc_emitf(irn, "xor %S1, %S0, %S0");
819 /* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get
820 * the next bigger integer that's evenly divisible by it. */
821 static unsigned get_aligned_sp_change(const unsigned num_regs)
823 const unsigned bytes = num_regs * SPARC_REGISTER_SIZE;
824 return round_up2(bytes, SPARC_STACK_ALIGNMENT);
827 /* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/
828 static void memperm_emit_spill_registers(const ir_node *node, int n_spilled,
831 assert(n_spilled < n_to_spill);
833 if (n_spilled == 0) {
834 /* We always reserve stack space for two registers because during copy
835 * processing we don't know yet if we also need to handle a cycle which
836 * needs two registers. More complicated code in emit_MemPerm would
837 * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but
838 * it is not worth the worse readability of emit_MemPerm. */
840 /* Keep stack pointer aligned. */
841 unsigned sp_change = get_aligned_sp_change(2);
842 sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change);
844 /* Spill register l0. */
845 sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE);
848 if (n_to_spill == 2) {
849 /* Spill register l1. */
850 sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
854 /* Restore register l0 or both l0 and l1, depending on n_spilled. */
855 static void memperm_emit_restore_registers(const ir_node *node, int n_spilled)
857 if (n_spilled == 2) {
858 /* Restore register l1. */
859 sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
862 /* Restore register l0. */
863 sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE);
865 /* Restore stack pointer. */
866 unsigned sp_change = get_aligned_sp_change(2);
867 sparc_emitf(node, "add %%sp, %u, %%sp", sp_change);
870 /* Emit code to copy in_ent to out_ent. Only uses l0. */
871 static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent,
874 ir_graph *irg = get_irn_irg(node);
875 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
876 int off_in = be_get_stack_entity_offset(layout, in_ent, 0);
877 int off_out = be_get_stack_entity_offset(layout, out_ent, 0);
879 /* Load from input entity. */
880 sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in);
881 /* Store to output entity. */
882 sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out);
885 /* Emit code to swap ent1 and ent2. Uses l0 and l1. */
886 static void memperm_emit_swap(const ir_node *node, ir_entity *ent1,
889 ir_graph *irg = get_irn_irg(node);
890 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
891 int off1 = be_get_stack_entity_offset(layout, ent1, 0);
892 int off2 = be_get_stack_entity_offset(layout, ent2, 0);
894 /* Load from first input entity. */
895 sparc_emitf(node, "ld [%%fp%+d], %%l0", off1);
896 /* Load from second input entity. */
897 sparc_emitf(node, "ld [%%fp%+d], %%l1", off2);
898 /* Store first value to second output entity. */
899 sparc_emitf(node, "st %%l0, [%%fp%+d]", off2);
900 /* Store second value to first output entity. */
901 sparc_emitf(node, "st %%l1, [%%fp%+d]", off1);
904 /* Find the index of ent in ents or return -1 if not found. */
905 static int get_index(ir_entity **ents, int n, ir_entity *ent)
907 for (int i = 0; i < n; ++i) {
916 * Emit code for a MemPerm node.
918 * Analyze MemPerm for copy chains and cyclic swaps and resolve them using
920 * This function is conceptually very similar to permute_values in
923 static void emit_be_MemPerm(const ir_node *node)
925 int memperm_arity = be_get_MemPerm_entity_arity(node);
926 /* Upper limit for the number of participating entities is twice the
927 * arity, e.g., for a simple copying MemPerm node with one input/output. */
928 int max_size = 2 * memperm_arity;
929 ir_entity **entities = ALLOCANZ(ir_entity *, max_size);
930 /* sourceof contains the input entity for each entity. If an entity is
931 * never used as an output, its entry in sourceof is a fix point. */
932 int *sourceof = ALLOCANZ(int, max_size);
933 /* n_users counts how many output entities use this entity as their input.*/
934 int *n_users = ALLOCANZ(int, max_size);
935 /* n_spilled records the number of spilled registers, either 1 or 2. */
938 /* This implementation currently only works with frame pointers. */
939 ir_graph *irg = get_irn_irg(node);
940 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
941 assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers");
943 for (int i = 0; i < max_size; ++i) {
948 for (int i = 0; i < memperm_arity; ++i) {
949 ir_entity *out = be_get_MemPerm_out_entity(node, i);
950 ir_entity *in = be_get_MemPerm_in_entity(node, i);
952 /* Insert into entities to be able to operate on unique indices. */
953 if (get_index(entities, n, out) == -1)
955 if (get_index(entities, n, in) == -1)
958 int oidx = get_index(entities, n, out);
959 int iidx = get_index(entities, n, in);
961 sourceof[oidx] = iidx; /* Remember the source. */
962 ++n_users[iidx]; /* Increment number of users of this entity. */
965 /* First do all the copies. */
966 for (int oidx = 0; oidx < n; /* empty */) {
967 int iidx = sourceof[oidx];
969 /* Nothing to do for fix points.
970 * Also, if entities[oidx] is used as an input by another copy, we
971 * can't overwrite entities[oidx] yet.*/
972 if (iidx == oidx || n_users[oidx] > 0) {
977 /* We found the end of a 'chain', so do the copy. */
978 if (n_spilled == 0) {
979 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1);
982 memperm_emit_copy(node, entities[iidx], entities[oidx]);
985 sourceof[oidx] = oidx;
987 assert(n_users[iidx] > 0);
988 /* Decrementing the number of users might enable us to do another
992 if (iidx < oidx && n_users[iidx] == 0) {
999 /* The rest are cycles. */
1000 for (int oidx = 0; oidx < n; /* empty */) {
1001 int iidx = sourceof[oidx];
1003 /* Nothing to do for fix points. */
1009 assert(n_users[iidx] == 1);
1011 /* Swap the two values to resolve the cycle. */
1012 if (n_spilled < 2) {
1013 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2);
1016 memperm_emit_swap(node, entities[iidx], entities[oidx]);
1018 int tidx = sourceof[iidx];
1020 sourceof[iidx] = iidx;
1022 /* The source of oidx is now the old source of iidx, because we swapped
1023 * the two entities. */
1024 sourceof[oidx] = tidx;
1027 #ifdef DEBUG_libfirm
1028 /* Only fix points should remain. */
1029 for (int i = 0; i < max_size; ++i) {
1030 assert(sourceof[i] == i);
1034 assert(n_spilled > 0 && "Useless MemPerm node");
1036 memperm_emit_restore_registers(node, n_spilled);
1039 static void emit_sparc_Return(const ir_node *node)
1041 ir_graph *irg = get_irn_irg(node);
1042 ir_entity *entity = get_irg_entity(irg);
1043 ir_type *type = get_entity_type(entity);
1045 const char *destreg = "%o7";
1047 /* hack: we don't explicitely model register changes because of the
1048 * restore node. So we have to do it manually here */
1049 const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node);
1050 if (delay_slot != NULL &&
1051 (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) {
1054 char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8";
1055 sparc_emitf(node, "jmp %s+%s", destreg, offset);
1056 fill_delay_slot(node);
1059 static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg)
1061 unsigned idx = reg->global_index;
1062 if (idx < REG_I0 || idx > REG_I7)
1064 idx += REG_O0 - REG_I0;
1065 assert(REG_O0 <= idx && idx <= REG_O7);
1066 return &sparc_registers[idx];
1069 static void emit_sparc_Restore(const ir_node *node)
1071 const arch_register_t *destreg
1072 = arch_get_irn_register_out(node, pn_sparc_Restore_res);
1073 sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg));
1076 static void emit_sparc_FrameAddr(const ir_node *node)
1078 const sparc_attr_t *attr = get_sparc_attr_const(node);
1079 int32_t offset = attr->immediate_value;
1081 char const *const insn = offset > 0 ? offset = -offset, "sub" : "add";
1082 assert(sparc_is_value_imm_encodeable(offset));
1083 sparc_emitf(node, "%s %S0, %d, %D0", insn, offset);
1086 static const char *get_icc_unsigned(ir_relation relation)
1088 switch (relation & (ir_relation_less_equal_greater)) {
1089 case ir_relation_false: return "bn";
1090 case ir_relation_equal: return "be";
1091 case ir_relation_less: return "blu";
1092 case ir_relation_less_equal: return "bleu";
1093 case ir_relation_greater: return "bgu";
1094 case ir_relation_greater_equal: return "bgeu";
1095 case ir_relation_less_greater: return "bne";
1096 case ir_relation_less_equal_greater: return "ba";
1097 default: panic("Cmp has unsupported relation");
1101 static const char *get_icc_signed(ir_relation relation)
1103 switch (relation & (ir_relation_less_equal_greater)) {
1104 case ir_relation_false: return "bn";
1105 case ir_relation_equal: return "be";
1106 case ir_relation_less: return "bl";
1107 case ir_relation_less_equal: return "ble";
1108 case ir_relation_greater: return "bg";
1109 case ir_relation_greater_equal: return "bge";
1110 case ir_relation_less_greater: return "bne";
1111 case ir_relation_less_equal_greater: return "ba";
1112 default: panic("Cmp has unsupported relation");
1116 static const char *get_fcc(ir_relation relation)
1119 case ir_relation_false: return "fbn";
1120 case ir_relation_equal: return "fbe";
1121 case ir_relation_less: return "fbl";
1122 case ir_relation_less_equal: return "fble";
1123 case ir_relation_greater: return "fbg";
1124 case ir_relation_greater_equal: return "fbge";
1125 case ir_relation_less_greater: return "fblg";
1126 case ir_relation_less_equal_greater: return "fbo";
1127 case ir_relation_unordered: return "fbu";
1128 case ir_relation_unordered_equal: return "fbue";
1129 case ir_relation_unordered_less: return "fbul";
1130 case ir_relation_unordered_less_equal: return "fbule";
1131 case ir_relation_unordered_greater: return "fbug";
1132 case ir_relation_unordered_greater_equal: return "fbuge";
1133 case ir_relation_unordered_less_greater: return "fbne";
1134 case ir_relation_true: return "fba";
1136 panic("invalid relation");
1139 typedef const char* (*get_cc_func)(ir_relation relation);
1141 static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc)
1143 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1144 ir_relation relation = attr->relation;
1145 const ir_node *proj_true = NULL;
1146 const ir_node *proj_false = NULL;
1148 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
1149 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
1150 foreach_out_edge(node, edge) {
1151 ir_node *proj = get_edge_src_irn(edge);
1152 long nr = get_Proj_proj(proj);
1153 if (nr == pn_sparc_Bicc_true) {
1156 assert(nr == pn_sparc_Bicc_false);
1161 /* emit the true proj */
1162 sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true);
1163 fill_delay_slot(node);
1165 const ir_node *block = get_nodes_block(node);
1166 const ir_node *next_block = (ir_node*)get_irn_link(block);
1168 if (get_jump_target(proj_false) == next_block) {
1169 if (be_options.verbose_asm) {
1170 sparc_emitf(node, "/* fallthrough to %L */", proj_false);
1173 sparc_emitf(node, "ba %L", proj_false);
1174 /* TODO: fill this slot as well */
1175 emitting_delay_slot = true;
1176 sparc_emitf(NULL, "nop");
1177 emitting_delay_slot = false;
1181 static void emit_sparc_Bicc(const ir_node *node)
1183 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1184 bool is_unsigned = attr->is_unsigned;
1185 emit_sparc_branch(node, is_unsigned ? get_icc_unsigned : get_icc_signed);
1188 static void emit_sparc_fbfcc(const ir_node *node)
1190 /* if the flags producing node was immediately in front of us, emit
1192 ir_node *flags = get_irn_n(node, n_sparc_fbfcc_flags);
1193 ir_node *prev = sched_prev(node);
1194 if (is_Block(prev)) {
1195 /* TODO: when the flags come from another block, then we have to do
1196 * more complicated tests to see wether the flag producing node is
1197 * potentially in front of us (could happen for fallthroughs) */
1198 panic("TODO: fbfcc flags come from other block");
1200 if (skip_Proj(flags) == prev) {
1201 sparc_emitf(NULL, "nop");
1203 emit_sparc_branch(node, get_fcc);
1206 static void emit_sparc_Ba(const ir_node *node)
1208 if (ba_is_fallthrough(node)) {
1209 if (be_options.verbose_asm) {
1210 sparc_emitf(node, "/* fallthrough to %L */", node);
1213 sparc_emitf(node, "ba %L", node);
1214 fill_delay_slot(node);
1218 static void emit_sparc_SwitchJmp(const ir_node *node)
1220 const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node);
1222 sparc_emitf(node, "jmp %S0");
1223 fill_delay_slot(node);
1225 be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target);
1228 static void emit_fmov(const ir_node *node, const arch_register_t *src_reg,
1229 const arch_register_t *dst_reg)
1231 sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg);
1234 static const arch_register_t *get_next_fp_reg(const arch_register_t *reg)
1236 unsigned idx = reg->global_index;
1237 assert(reg == &sparc_registers[idx]);
1239 assert(idx - REG_F0 < N_sparc_fp_REGS);
1240 return &sparc_registers[idx];
1243 static void emit_be_Copy(const ir_node *node)
1245 ir_mode *mode = get_irn_mode(node);
1246 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
1247 const arch_register_t *dst_reg = arch_get_irn_register_out(node, 0);
1249 if (src_reg == dst_reg)
1252 if (mode_is_float(mode)) {
1253 unsigned bits = get_mode_size_bits(mode);
1254 int n = bits > 32 ? bits > 64 ? 3 : 1 : 0;
1255 emit_fmov(node, src_reg, dst_reg);
1256 for (int i = 0; i < n; ++i) {
1257 src_reg = get_next_fp_reg(src_reg);
1258 dst_reg = get_next_fp_reg(dst_reg);
1259 emit_fmov(node, src_reg, dst_reg);
1261 } else if (mode_is_data(mode)) {
1262 sparc_emitf(node, "mov %S0, %D0");
1264 panic("invalid mode");
1269 * Enters the emitter functions for handled nodes into the generic
1270 * pointer of an opcode.
1272 static void sparc_register_emitters(void)
1274 /* first clear the generic function pointer for all ops */
1275 ir_clear_opcodes_generic_func();
1276 /* register all emitter functions defined in spec */
1277 sparc_register_spec_emitters();
1279 /* custom emitter */
1280 be_set_emitter(op_be_Copy, emit_be_Copy);
1281 be_set_emitter(op_be_CopyKeep, emit_be_Copy);
1282 be_set_emitter(op_be_IncSP, emit_be_IncSP);
1283 be_set_emitter(op_be_MemPerm, emit_be_MemPerm);
1284 be_set_emitter(op_be_Perm, emit_be_Perm);
1285 be_set_emitter(op_sparc_Ba, emit_sparc_Ba);
1286 be_set_emitter(op_sparc_Bicc, emit_sparc_Bicc);
1287 be_set_emitter(op_sparc_Call, emit_sparc_Call);
1288 be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
1289 be_set_emitter(op_sparc_Restore, emit_sparc_Restore);
1290 be_set_emitter(op_sparc_Return, emit_sparc_Return);
1291 be_set_emitter(op_sparc_SDiv, emit_sparc_SDiv);
1292 be_set_emitter(op_sparc_SubSP, emit_sparc_SubSP);
1293 be_set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp);
1294 be_set_emitter(op_sparc_UDiv, emit_sparc_UDiv);
1295 be_set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc);
1297 /* no need to emit anything for the following nodes */
1298 be_set_emitter(op_Phi, be_emit_nothing);
1299 be_set_emitter(op_be_Keep, be_emit_nothing);
1300 be_set_emitter(op_sparc_Start, be_emit_nothing);
1303 static bool block_needs_label(const ir_node *block, const ir_node *sched_prev)
1305 if (get_Block_entity(block) != NULL)
1308 int n_cfgpreds = get_Block_n_cfgpreds(block);
1309 if (n_cfgpreds == 0) {
1311 } else if (n_cfgpreds > 1) {
1314 ir_node *cfgpred = get_Block_cfgpred(block, 0);
1315 ir_node *cfgpred_block = get_nodes_block(cfgpred);
1316 if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred)))
1318 return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block;
1323 * Walks over the nodes in a block connected by scheduling edges
1324 * and emits code for each node.
1326 static void sparc_emit_block(ir_node *block, ir_node *prev)
1328 bool needs_label = block_needs_label(block, prev);
1329 be_gas_begin_block(block, needs_label);
1331 sched_foreach(block, node) {
1332 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
1339 * Emits code for function start.
1341 static void sparc_emit_func_prolog(ir_graph *irg)
1343 ir_entity *entity = get_irg_entity(irg);
1344 be_gas_emit_function_prolog(entity, 4, NULL);
1348 * Emits code for function end
1350 static void sparc_emit_func_epilog(ir_graph *irg)
1352 ir_entity *entity = get_irg_entity(irg);
1353 be_gas_emit_function_epilog(entity);
1356 static void init_jump_links(ir_node *block, void *env)
1360 int n = get_Block_n_cfgpreds(block);
1361 for (n--; n >= 0; n--) {
1362 ir_node *pred = get_Block_cfgpred(block, n);
1363 set_jump_target(pred, block);
1367 static int cmp_block_execfreqs(const void *d1, const void *d2)
1369 ir_node **p1 = (ir_node**)d1;
1370 ir_node **p2 = (ir_node**)d2;
1371 double freq1 = get_block_execfreq(*p1);
1372 double freq2 = get_block_execfreq(*p2);
1377 return get_irn_node_nr(*p2)-get_irn_node_nr(*p1);
1380 static void pick_delay_slots(size_t n_blocks, ir_node **blocks)
1382 /* create blocklist sorted by execution frequency */
1383 ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks);
1384 memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0]));
1385 qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]),
1386 cmp_block_execfreqs);
1388 for (size_t i = 0; i < n_blocks; ++i) {
1389 const ir_node *block = sorted_blocks[i];
1390 sched_foreach(block, node) {
1391 if (!has_delay_slot(node))
1393 ir_node *filler = pick_delay_slot_for(node);
1396 rbitset_set(delay_slot_fillers, get_irn_idx(filler));
1397 pmap_insert(delay_slots, node, filler);
1402 void sparc_emit_routine(ir_graph *irg)
1404 heights = heights_new(irg);
1405 delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg));
1406 delay_slots = pmap_create();
1408 /* register all emitter functions */
1409 sparc_register_emitters();
1411 /* create the block schedule. For now, we don't need it earlier. */
1412 ir_node **block_schedule = be_create_block_schedule(irg);
1414 sparc_emit_func_prolog(irg);
1415 irg_block_walk_graph(irg, init_jump_links, NULL, NULL);
1417 /* inject block scheduling links & emit code of each block */
1418 size_t n_blocks = ARR_LEN(block_schedule);
1419 for (size_t i = 0; i < n_blocks; ++i) {
1420 ir_node *block = block_schedule[i];
1421 ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL;
1422 set_irn_link(block, next_block);
1425 pick_delay_slots(n_blocks, block_schedule);
1427 for (size_t i = 0; i < n_blocks; ++i) {
1428 ir_node *block = block_schedule[i];
1429 ir_node *prev = i>=1 ? block_schedule[i-1] : NULL;
1430 if (block == get_irg_end_block(irg))
1432 sparc_emit_block(block, prev);
1435 /* emit function epilog */
1436 sparc_emit_func_epilog(irg);
1438 pmap_destroy(delay_slots);
1439 xfree(delay_slot_fillers);
1440 heights_free(heights);
1443 void sparc_init_emitter(void)
1445 FIRM_DBG_REGISTER(dbg, "firm.be.sparc.emit");