2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief emit assembler for a backend graph
9 * @author Hannes Rapp, Matthias Braun
15 #include "bitfiddle.h"
27 #include "raw_bitset.h"
31 #include "execfreq_t.h"
34 #include "beblocksched.h"
40 #include "bepeephole.h"
42 #include "sparc_emitter.h"
43 #include "gen_sparc_emitter.h"
44 #include "sparc_nodes_attr.h"
45 #include "sparc_new_nodes.h"
46 #include "gen_sparc_regalloc_if.h"
48 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
50 static ir_heights_t *heights;
51 static unsigned *delay_slot_fillers;
52 static pmap *delay_slots;
54 static bool emitting_delay_slot;
57 * indent before instruction. (Adds additional indentation when emitting
60 static void sparc_emit_indent(void)
63 if (emitting_delay_slot)
67 static void sparc_emit_immediate(ir_node const *const node)
69 const sparc_attr_t *attr = get_sparc_attr_const(node);
70 ir_entity *entity = attr->immediate_value_entity;
73 int32_t value = attr->immediate_value;
74 assert(sparc_is_value_imm_encodeable(value));
75 be_emit_irprintf("%d", value);
77 if (get_entity_owner(entity) == get_tls_type()) {
78 be_emit_cstring("%tle_lox10(");
80 be_emit_cstring("%lo(");
82 be_gas_emit_entity(entity);
83 if (attr->immediate_value != 0) {
84 be_emit_irprintf("%+d", attr->immediate_value);
90 static void sparc_emit_high_immediate(ir_node const *node)
92 const sparc_attr_t *attr = get_sparc_attr_const(node);
93 ir_entity *entity = attr->immediate_value_entity;
96 uint32_t value = (uint32_t) attr->immediate_value;
97 be_emit_irprintf("%%hi(0x%X)", value);
99 if (get_entity_owner(entity) == get_tls_type()) {
100 be_emit_cstring("%tle_hix22(");
102 be_emit_cstring("%hi(");
104 be_gas_emit_entity(entity);
105 if (attr->immediate_value != 0) {
106 be_emit_irprintf("%+d", attr->immediate_value);
112 static void sparc_emit_source_register(ir_node const *node, int const pos)
114 const arch_register_t *reg = arch_get_irn_register_in(node, pos);
116 be_emit_string(reg->name);
119 static void sparc_emit_dest_register(ir_node const *const node, int const pos)
121 const arch_register_t *reg = arch_get_irn_register_out(node, pos);
123 be_emit_string(reg->name);
129 static void sparc_emit_offset(const ir_node *node, int offset_node_pos)
131 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
133 if (attr->is_reg_reg) {
134 assert(!attr->is_frame_entity);
135 assert(attr->base.immediate_value == 0);
136 assert(attr->base.immediate_value_entity == NULL);
138 sparc_emit_source_register(node, offset_node_pos);
139 } else if (attr->is_frame_entity) {
140 int32_t offset = attr->base.immediate_value;
142 assert(sparc_is_value_imm_encodeable(offset));
143 be_emit_irprintf("%+ld", offset);
145 } else if (attr->base.immediate_value != 0
146 || attr->base.immediate_value_entity != NULL) {
148 sparc_emit_immediate(node);
155 static void sparc_emit_load_mode(ir_node const *const node)
157 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
158 ir_mode *mode = attr->load_store_mode;
159 int bits = get_mode_size_bits(mode);
160 bool is_signed = mode_is_signed(mode);
163 case 8: be_emit_string(is_signed ? "sb" : "ub"); break;
164 case 16: be_emit_string(is_signed ? "sh" : "uh"); break;
166 case 64: be_emit_char('d'); break;
167 case 128: be_emit_char('q'); break;
168 default: panic("invalid load/store mode %+F", mode);
173 * Emit store mode char
175 static void sparc_emit_store_mode(ir_node const *const node)
177 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
178 ir_mode *mode = attr->load_store_mode;
179 int bits = get_mode_size_bits(mode);
182 case 8: be_emit_char('b'); break;
183 case 16: be_emit_char('h'); break;
185 case 64: be_emit_char('d'); break;
186 case 128: be_emit_char('q'); break;
187 default: panic("invalid load/store mode %+F", mode);
191 static void emit_fp_suffix(const ir_mode *mode)
193 assert(mode_is_float(mode));
194 switch (get_mode_size_bits(mode)) {
195 case 32: be_emit_char('s'); break;
196 case 64: be_emit_char('d'); break;
197 case 128: be_emit_char('q'); break;
198 default: panic("invalid FP mode");
202 static void set_jump_target(ir_node *jump, ir_node *target)
204 set_irn_link(jump, target);
207 static ir_node *get_jump_target(const ir_node *jump)
209 return (ir_node*)get_irn_link(jump);
213 * Returns the target label for a control flow node.
215 static void sparc_emit_cfop_target(const ir_node *node)
217 ir_node *block = get_jump_target(node);
218 be_gas_emit_block_name(block);
222 * returns true if a sparc_call calls a register and not an immediate
224 static bool is_sparc_reg_call(const ir_node *node)
226 const sparc_attr_t *attr = get_sparc_attr_const(node);
227 return attr->immediate_value_entity == NULL;
230 static int get_sparc_Call_dest_addr_pos(const ir_node *node)
232 assert(is_sparc_reg_call(node));
233 return get_irn_arity(node)-1;
236 static bool ba_is_fallthrough(const ir_node *node)
238 ir_node *block = get_nodes_block(node);
239 ir_node *next_block = (ir_node*)get_irn_link(block);
240 return get_jump_target(node) == next_block;
243 static bool is_no_instruction(const ir_node *node)
245 /* copies are nops if src_reg == dest_reg */
246 if (be_is_Copy(node) || be_is_CopyKeep(node)) {
247 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
248 const arch_register_t *dest_reg = arch_get_irn_register_out(node, 0);
250 if (src_reg == dest_reg)
253 if (be_is_IncSP(node) && be_get_IncSP_offset(node) == 0)
255 /* Ba is not emitted if it is a simple fallthrough */
256 if (is_sparc_Ba(node) && ba_is_fallthrough(node))
259 return be_is_Keep(node) || be_is_Start(node) || is_Phi(node);
262 static bool has_delay_slot(const ir_node *node)
264 if (is_sparc_Ba(node)) {
265 return !ba_is_fallthrough(node);
268 return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot;
271 /** returns true if the emitter for this sparc node can produce more than one
272 * actual sparc instruction.
273 * Usually it is a bad sign if we have to add instructions here. We should
274 * rather try to get them lowered down. So we can actually put them into
275 * delay slots and make them more accessible to the scheduler.
277 static bool emits_multiple_instructions(const ir_node *node)
279 if (has_delay_slot(node))
282 if (is_sparc_Call(node))
283 return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return;
285 return is_sparc_SMulh(node) || is_sparc_UMulh(node)
286 || is_sparc_SDiv(node) || is_sparc_UDiv(node)
287 || be_is_MemPerm(node) || be_is_Perm(node)
288 || is_sparc_SubSP(node);
291 static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width)
293 int arity = get_irn_arity(node);
294 for (int i = 0; i < arity; ++i) {
295 const arch_register_t *in_reg = arch_get_irn_register_in(node, i);
296 const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i);
299 if (reg_index < (unsigned)in_reg->global_index + in_req->width
300 && reg_index + width > in_reg->global_index)
306 static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width)
308 be_foreach_out(node, o) {
309 const arch_register_t *out_reg = arch_get_irn_register_out(node, o);
312 const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o);
313 if (reg_index < (unsigned)out_reg->global_index + out_req->width
314 && reg_index + width > out_reg->global_index)
320 static bool is_legal_delay_slot_filler(const ir_node *node)
322 if (is_no_instruction(node))
324 if (emits_multiple_instructions(node))
326 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
331 static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to)
333 if (!is_legal_delay_slot_filler(node))
336 if (!be_can_move_down(heights, node, to))
339 if (is_sparc_Call(to)) {
341 /** all inputs are used after the delay slot so, we're fine */
342 if (!is_sparc_reg_call(to))
345 check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to));
346 if (skip_Proj(check) == node)
349 /* the Call also destroys the value of %o7, but since this is
350 * currently marked as ignore register in the backend, it
351 * should never be used by the instruction in the delay slot. */
352 if (uses_reg(node, REG_O7, 1))
355 } else if (is_sparc_Return(to)) {
356 /* return uses the value of %o7, all other values are not
357 * immediately used */
358 if (writes_reg(node, REG_O7, 1))
362 /* the node must not use our computed values */
363 int arity = get_irn_arity(to);
364 for (int i = 0; i < arity; ++i) {
365 ir_node *in = get_irn_n(to, i);
366 if (skip_Proj(in) == node)
373 static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to)
375 if (!be_can_move_up(heights, node, to))
378 /* node must not use any results of 'to' */
379 int arity = get_irn_arity(node);
380 for (int i = 0; i < arity; ++i) {
381 ir_node *in = get_irn_n(node, i);
382 ir_node *skipped = skip_Proj(in);
387 /* register window cycling effects at Restore aren't correctly represented
388 * in the graph yet so we need this exception here */
389 if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) {
391 } else if (is_sparc_Call(to)) {
392 /* node must not overwrite any of the inputs of the call,
393 * (except for the dest_addr) */
394 int dest_addr_pos = is_sparc_reg_call(to)
395 ? get_sparc_Call_dest_addr_pos(to) : -1;
397 int call_arity = get_irn_arity(to);
398 for (int i = 0; i < call_arity; ++i) {
399 if (i == dest_addr_pos)
401 const arch_register_t *reg = arch_get_irn_register_in(to, i);
404 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
405 if (writes_reg(node, reg->global_index, req->width))
409 /* node must not write to one of the call outputs */
410 be_foreach_out(to, o) {
411 const arch_register_t *reg = arch_get_irn_register_out(to, o);
414 const arch_register_req_t *req = arch_get_irn_register_req_out(to, o);
415 if (writes_reg(node, reg->global_index, req->width))
418 } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) {
419 /* node will be inserted between wr and div so it must not overwrite
420 * anything except the wr input */
421 int arity = get_irn_arity(to);
422 for (int i = 0; i < arity; ++i) {
423 assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high);
424 if (i == n_sparc_SDiv_dividend_high)
426 const arch_register_t *reg = arch_get_irn_register_in(to, i);
429 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
430 if (writes_reg(node, reg->global_index, req->width))
437 static void optimize_fallthrough(ir_node *node)
439 ir_node *proj_true = NULL;
440 ir_node *proj_false = NULL;
442 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
443 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
444 foreach_out_edge(node, edge) {
445 ir_node *proj = get_edge_src_irn(edge);
446 long nr = get_Proj_proj(proj);
447 if (nr == pn_sparc_Bicc_true) {
450 assert(nr == pn_sparc_Bicc_false);
454 assert(proj_true != NULL && proj_false != NULL);
456 /* for now, the code works for scheduled and non-schedules blocks */
457 const ir_node *block = get_nodes_block(node);
459 /* we have a block schedule */
460 const ir_node *next_block = (ir_node*)get_irn_link(block);
462 if (get_jump_target(proj_true) == next_block) {
463 /* exchange both proj destinations so the second one can be omitted */
464 set_Proj_proj(proj_true, pn_sparc_Bicc_false);
465 set_Proj_proj(proj_false, pn_sparc_Bicc_true);
467 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
468 attr->relation = get_negated_relation(attr->relation);
473 * search for an instruction that can fill the delay slot of @p node
475 static ir_node *pick_delay_slot_for(ir_node *node)
477 static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10;
478 assert(has_delay_slot(node));
480 if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
481 optimize_fallthrough(node);
485 sched_foreach_reverse_before(node, schedpoint) {
486 if (has_delay_slot(schedpoint))
488 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
491 if (!can_move_down_into_delayslot(schedpoint, node))
494 /* found something */
498 /* search after the current position */
500 sched_foreach_after(node, schedpoint) {
501 if (has_delay_slot(schedpoint))
503 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
505 if (!is_legal_delay_slot_filler(schedpoint))
507 if (!can_move_up_into_delayslot(schedpoint, node))
510 /* found something */
514 /* look in successor blocks */
515 ir_node *block = get_nodes_block(node);
516 /* TODO: sort succs by execution frequency */
517 foreach_block_succ(block, edge) {
518 ir_node *succ = get_edge_src_irn(edge);
519 /* we can't easily move up stuff from blocks with multiple predecessors
520 * since the instruction is lacking for the other preds then.
521 * (We also don't have to do any phi translation) */
522 if (get_Block_n_cfgpreds(succ) > 1)
526 sched_foreach(succ, schedpoint) {
527 if (has_delay_slot(schedpoint))
529 /* can't move pinned nodes accross blocks */
530 if (get_irn_pinned(schedpoint) == op_pin_state_pinned)
532 /* restore doesn't model register window switching correctly,
533 * so it appears like we could move it, which is not true */
534 if (is_sparc_Restore(schedpoint)
535 || is_sparc_RestoreZero(schedpoint))
537 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
539 if (!is_legal_delay_slot_filler(schedpoint))
541 if (can_move_up_into_delayslot(schedpoint, node)) {
542 /* it's fine to move the insn accross blocks */
544 } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
545 ir_node *proj = get_Block_cfgpred(succ, 0);
546 long nr = get_Proj_proj(proj);
547 if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true)
548 && be_can_move_up(heights, schedpoint, succ)) {
549 /* we can use it with the annul flag */
550 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
551 attr->annul_delay_slot = true;
561 void sparc_emitf(ir_node const *const node, char const *fmt, ...)
567 char const *start = fmt;
569 while (*fmt != '%' && *fmt != '\0')
571 be_emit_string_len(start, fmt - start);
588 const sparc_jmp_cond_attr_t *attr
589 = get_sparc_jmp_cond_attr_const(node);
590 if (attr->annul_delay_slot) {
591 be_emit_cstring(",a");
597 if (*fmt < '0' || '9' <= *fmt)
599 sparc_emit_dest_register(node, *fmt++ - '0');
603 sparc_attr_t const *const attr = get_sparc_attr_const(node);
604 be_gas_emit_entity(attr->immediate_value_entity);
605 if (attr->immediate_value != 0) {
606 be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value);
614 case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break;
615 case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode; break;
616 case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode; break;
617 default: goto unknown;
619 emit_fp_suffix(mode);
624 sparc_emit_high_immediate(node);
628 ir_node *n = va_arg(ap, ir_node*);
629 sparc_emit_cfop_target(n);
635 case 'L': sparc_emit_load_mode(node); break;
636 case 'S': sparc_emit_store_mode(node); break;
637 default: goto unknown;
642 if (*fmt < '0' || '9' <= *fmt)
644 sparc_emit_offset(node, *fmt++ - '0');
648 arch_register_t const *const reg = va_arg(ap, const arch_register_t*);
650 be_emit_string(reg->name);
660 if (*fmt < '0' || '9' <= *fmt)
662 unsigned const pos = *fmt++ - '0';
663 if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) {
664 sparc_emit_immediate(node);
666 sparc_emit_source_register(node, pos);
672 int const num = va_arg(ap, int);
673 be_emit_irprintf(plus ? "%+d" : "%d", num);
678 char const *const str = va_arg(ap, char const*);
684 unsigned const num = va_arg(ap, unsigned);
685 be_emit_irprintf(plus ? "%+u" : "%u", num);
691 panic("unknown format conversion in sparc_emitf()");
694 be_emit_finish_line_gas(node);
699 * Emits code for stack space management
701 static void emit_be_IncSP(const ir_node *irn)
703 int offset = be_get_IncSP_offset(irn);
708 /* SPARC stack grows downwards */
709 char const *const insn = offset > 0 ? offset = -offset, "add" : "sub";
710 sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset);
714 * Emits code for stack space management.
716 static void emit_sparc_SubSP(const ir_node *irn)
718 sparc_emitf(irn, "sub %S0, %SI1, %D0");
719 sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE);
722 static void fill_delay_slot(const ir_node *node)
724 emitting_delay_slot = true;
725 const ir_node *filler = pmap_get(ir_node, delay_slots, node);
726 if (filler != NULL) {
727 assert(!is_no_instruction(filler));
728 assert(!emits_multiple_instructions(filler));
729 be_emit_node(filler);
731 sparc_emitf(NULL, "nop");
733 emitting_delay_slot = false;
736 static void emit_sparc_Div(const ir_node *node, char const *const insn)
738 sparc_emitf(node, "wr %S0, 0, %%y");
740 /* TODO: we should specify number of delayslots in an architecture
742 unsigned wry_delay_count = 3;
743 for (unsigned i = 0; i < wry_delay_count; ++i) {
745 fill_delay_slot(node);
747 emitting_delay_slot = true;
748 sparc_emitf(NULL, "nop");
749 emitting_delay_slot = false;
753 sparc_emitf(node, "%s %S1, %SI2, %D0", insn);
756 static void emit_sparc_SDiv(const ir_node *node)
758 emit_sparc_Div(node, "sdiv");
761 static void emit_sparc_UDiv(const ir_node *node)
763 emit_sparc_Div(node, "udiv");
766 static void emit_sparc_Call(const ir_node *node)
768 if (is_sparc_reg_call(node)) {
769 int dest_addr = get_sparc_Call_dest_addr_pos(node);
770 sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr));
772 sparc_emitf(node, "call %E, 0");
775 fill_delay_slot(node);
777 if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) {
778 sparc_emitf(NULL, "unimp 8");
782 static void emit_be_Perm(const ir_node *irn)
784 ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
785 if (mode_is_float(mode)) {
786 const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0);
787 const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1);
788 unsigned reg_idx0 = reg0->global_index;
789 unsigned reg_idx1 = reg1->global_index;
790 unsigned width = arch_get_irn_register_req_in(irn, 0)->width;
791 for (unsigned i = 0; i < width; ++i) {
792 const arch_register_t *r0 = &sparc_registers[reg_idx0+i];
793 const arch_register_t *r1 = &sparc_registers[reg_idx1+i];
794 sparc_emitf(irn, "fmovs %R, %%f31", r0);
795 sparc_emitf(irn, "fmovs %R, %R", r1, r0);
796 sparc_emitf(irn, "fmovs %%f31, %R", r1);
799 sparc_emitf(irn, "xor %S1, %S0, %S0");
800 sparc_emitf(irn, "xor %S1, %S0, %S1");
801 sparc_emitf(irn, "xor %S1, %S0, %S0");
805 /* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get
806 * the next bigger integer that's evenly divisible by it. */
807 static unsigned get_aligned_sp_change(const unsigned num_regs)
809 const unsigned bytes = num_regs * SPARC_REGISTER_SIZE;
810 return round_up2(bytes, SPARC_STACK_ALIGNMENT);
813 /* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/
814 static void memperm_emit_spill_registers(const ir_node *node, int n_spilled,
817 assert(n_spilled < n_to_spill);
819 if (n_spilled == 0) {
820 /* We always reserve stack space for two registers because during copy
821 * processing we don't know yet if we also need to handle a cycle which
822 * needs two registers. More complicated code in emit_MemPerm would
823 * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but
824 * it is not worth the worse readability of emit_MemPerm. */
826 /* Keep stack pointer aligned. */
827 unsigned sp_change = get_aligned_sp_change(2);
828 sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change);
830 /* Spill register l0. */
831 sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE);
834 if (n_to_spill == 2) {
835 /* Spill register l1. */
836 sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
840 /* Restore register l0 or both l0 and l1, depending on n_spilled. */
841 static void memperm_emit_restore_registers(const ir_node *node, int n_spilled)
843 if (n_spilled == 2) {
844 /* Restore register l1. */
845 sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
848 /* Restore register l0. */
849 sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE);
851 /* Restore stack pointer. */
852 unsigned sp_change = get_aligned_sp_change(2);
853 sparc_emitf(node, "add %%sp, %u, %%sp", sp_change);
856 /* Emit code to copy in_ent to out_ent. Only uses l0. */
857 static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent,
860 ir_graph *irg = get_irn_irg(node);
861 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
862 int off_in = be_get_stack_entity_offset(layout, in_ent, 0);
863 int off_out = be_get_stack_entity_offset(layout, out_ent, 0);
865 /* Load from input entity. */
866 sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in);
867 /* Store to output entity. */
868 sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out);
871 /* Emit code to swap ent1 and ent2. Uses l0 and l1. */
872 static void memperm_emit_swap(const ir_node *node, ir_entity *ent1,
875 ir_graph *irg = get_irn_irg(node);
876 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
877 int off1 = be_get_stack_entity_offset(layout, ent1, 0);
878 int off2 = be_get_stack_entity_offset(layout, ent2, 0);
880 /* Load from first input entity. */
881 sparc_emitf(node, "ld [%%fp%+d], %%l0", off1);
882 /* Load from second input entity. */
883 sparc_emitf(node, "ld [%%fp%+d], %%l1", off2);
884 /* Store first value to second output entity. */
885 sparc_emitf(node, "st %%l0, [%%fp%+d]", off2);
886 /* Store second value to first output entity. */
887 sparc_emitf(node, "st %%l1, [%%fp%+d]", off1);
890 /* Find the index of ent in ents or return -1 if not found. */
891 static int get_index(ir_entity **ents, int n, ir_entity *ent)
893 for (int i = 0; i < n; ++i) {
902 * Emit code for a MemPerm node.
904 * Analyze MemPerm for copy chains and cyclic swaps and resolve them using
906 * This function is conceptually very similar to permute_values in
909 static void emit_be_MemPerm(const ir_node *node)
911 int memperm_arity = be_get_MemPerm_entity_arity(node);
912 /* Upper limit for the number of participating entities is twice the
913 * arity, e.g., for a simple copying MemPerm node with one input/output. */
914 int max_size = 2 * memperm_arity;
915 ir_entity **entities = ALLOCANZ(ir_entity *, max_size);
916 /* sourceof contains the input entity for each entity. If an entity is
917 * never used as an output, its entry in sourceof is a fix point. */
918 int *sourceof = ALLOCANZ(int, max_size);
919 /* n_users counts how many output entities use this entity as their input.*/
920 int *n_users = ALLOCANZ(int, max_size);
921 /* n_spilled records the number of spilled registers, either 1 or 2. */
924 /* This implementation currently only works with frame pointers. */
925 ir_graph *irg = get_irn_irg(node);
926 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
927 assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers");
929 for (int i = 0; i < max_size; ++i) {
934 for (int i = 0; i < memperm_arity; ++i) {
935 ir_entity *out = be_get_MemPerm_out_entity(node, i);
936 ir_entity *in = be_get_MemPerm_in_entity(node, i);
938 /* Insert into entities to be able to operate on unique indices. */
939 if (get_index(entities, n, out) == -1)
941 if (get_index(entities, n, in) == -1)
944 int oidx = get_index(entities, n, out);
945 int iidx = get_index(entities, n, in);
947 sourceof[oidx] = iidx; /* Remember the source. */
948 ++n_users[iidx]; /* Increment number of users of this entity. */
951 /* First do all the copies. */
952 for (int oidx = 0; oidx < n; /* empty */) {
953 int iidx = sourceof[oidx];
955 /* Nothing to do for fix points.
956 * Also, if entities[oidx] is used as an input by another copy, we
957 * can't overwrite entities[oidx] yet.*/
958 if (iidx == oidx || n_users[oidx] > 0) {
963 /* We found the end of a 'chain', so do the copy. */
964 if (n_spilled == 0) {
965 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1);
968 memperm_emit_copy(node, entities[iidx], entities[oidx]);
971 sourceof[oidx] = oidx;
973 assert(n_users[iidx] > 0);
974 /* Decrementing the number of users might enable us to do another
978 if (iidx < oidx && n_users[iidx] == 0) {
985 /* The rest are cycles. */
986 for (int oidx = 0; oidx < n; /* empty */) {
987 int iidx = sourceof[oidx];
989 /* Nothing to do for fix points. */
995 assert(n_users[iidx] == 1);
997 /* Swap the two values to resolve the cycle. */
999 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2);
1002 memperm_emit_swap(node, entities[iidx], entities[oidx]);
1004 int tidx = sourceof[iidx];
1006 sourceof[iidx] = iidx;
1008 /* The source of oidx is now the old source of iidx, because we swapped
1009 * the two entities. */
1010 sourceof[oidx] = tidx;
1013 #ifdef DEBUG_libfirm
1014 /* Only fix points should remain. */
1015 for (int i = 0; i < max_size; ++i) {
1016 assert(sourceof[i] == i);
1020 assert(n_spilled > 0 && "Useless MemPerm node");
1022 memperm_emit_restore_registers(node, n_spilled);
1025 static void emit_sparc_Return(const ir_node *node)
1027 ir_graph *irg = get_irn_irg(node);
1028 ir_entity *entity = get_irg_entity(irg);
1029 ir_type *type = get_entity_type(entity);
1031 const char *destreg = "%o7";
1033 /* hack: we don't explicitely model register changes because of the
1034 * restore node. So we have to do it manually here */
1035 const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node);
1036 if (delay_slot != NULL &&
1037 (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) {
1040 char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8";
1041 sparc_emitf(node, "jmp %s+%s", destreg, offset);
1042 fill_delay_slot(node);
1045 static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg)
1047 unsigned idx = reg->global_index;
1048 if (idx < REG_I0 || idx > REG_I7)
1050 idx += REG_O0 - REG_I0;
1051 assert(REG_O0 <= idx && idx <= REG_O7);
1052 return &sparc_registers[idx];
1055 static void emit_sparc_Restore(const ir_node *node)
1057 const arch_register_t *destreg
1058 = arch_get_irn_register_out(node, pn_sparc_Restore_res);
1059 sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg));
1062 static void emit_sparc_FrameAddr(const ir_node *node)
1064 const sparc_attr_t *attr = get_sparc_attr_const(node);
1065 int32_t offset = attr->immediate_value;
1067 char const *const insn = offset > 0 ? offset = -offset, "sub" : "add";
1068 assert(sparc_is_value_imm_encodeable(offset));
1069 sparc_emitf(node, "%s %S0, %d, %D0", insn, offset);
1072 static const char *get_icc_unsigned(ir_relation relation)
1074 switch (relation & (ir_relation_less_equal_greater)) {
1075 case ir_relation_false: return "bn";
1076 case ir_relation_equal: return "be";
1077 case ir_relation_less: return "blu";
1078 case ir_relation_less_equal: return "bleu";
1079 case ir_relation_greater: return "bgu";
1080 case ir_relation_greater_equal: return "bgeu";
1081 case ir_relation_less_greater: return "bne";
1082 case ir_relation_less_equal_greater: return "ba";
1083 default: panic("Cmp has unsupported relation");
1087 static const char *get_icc_signed(ir_relation relation)
1089 switch (relation & (ir_relation_less_equal_greater)) {
1090 case ir_relation_false: return "bn";
1091 case ir_relation_equal: return "be";
1092 case ir_relation_less: return "bl";
1093 case ir_relation_less_equal: return "ble";
1094 case ir_relation_greater: return "bg";
1095 case ir_relation_greater_equal: return "bge";
1096 case ir_relation_less_greater: return "bne";
1097 case ir_relation_less_equal_greater: return "ba";
1098 default: panic("Cmp has unsupported relation");
1102 static const char *get_fcc(ir_relation relation)
1105 case ir_relation_false: return "fbn";
1106 case ir_relation_equal: return "fbe";
1107 case ir_relation_less: return "fbl";
1108 case ir_relation_less_equal: return "fble";
1109 case ir_relation_greater: return "fbg";
1110 case ir_relation_greater_equal: return "fbge";
1111 case ir_relation_less_greater: return "fblg";
1112 case ir_relation_less_equal_greater: return "fbo";
1113 case ir_relation_unordered: return "fbu";
1114 case ir_relation_unordered_equal: return "fbue";
1115 case ir_relation_unordered_less: return "fbul";
1116 case ir_relation_unordered_less_equal: return "fbule";
1117 case ir_relation_unordered_greater: return "fbug";
1118 case ir_relation_unordered_greater_equal: return "fbuge";
1119 case ir_relation_unordered_less_greater: return "fbne";
1120 case ir_relation_true: return "fba";
1122 panic("invalid relation");
1125 typedef const char* (*get_cc_func)(ir_relation relation);
1127 static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc)
1129 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1130 ir_relation relation = attr->relation;
1131 const ir_node *proj_true = NULL;
1132 const ir_node *proj_false = NULL;
1134 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
1135 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
1136 foreach_out_edge(node, edge) {
1137 ir_node *proj = get_edge_src_irn(edge);
1138 long nr = get_Proj_proj(proj);
1139 if (nr == pn_sparc_Bicc_true) {
1142 assert(nr == pn_sparc_Bicc_false);
1147 /* emit the true proj */
1148 sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true);
1149 fill_delay_slot(node);
1151 const ir_node *block = get_nodes_block(node);
1152 const ir_node *next_block = (ir_node*)get_irn_link(block);
1154 if (get_jump_target(proj_false) == next_block) {
1155 if (be_options.verbose_asm) {
1156 sparc_emitf(node, "/* fallthrough to %L */", proj_false);
1159 sparc_emitf(node, "ba %L", proj_false);
1160 /* TODO: fill this slot as well */
1161 emitting_delay_slot = true;
1162 sparc_emitf(NULL, "nop");
1163 emitting_delay_slot = false;
1167 static void emit_sparc_Bicc(const ir_node *node)
1169 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1170 bool is_unsigned = attr->is_unsigned;
1171 emit_sparc_branch(node, is_unsigned ? get_icc_unsigned : get_icc_signed);
1174 static void emit_sparc_fbfcc(const ir_node *node)
1176 /* if the flags producing node was immediately in front of us, emit
1178 ir_node *flags = get_irn_n(node, n_sparc_fbfcc_flags);
1179 ir_node *prev = sched_prev(node);
1180 if (is_Block(prev)) {
1181 /* TODO: when the flags come from another block, then we have to do
1182 * more complicated tests to see whether the flag producing node is
1183 * potentially in front of us (could happen for fallthroughs) */
1184 panic("TODO: fbfcc flags come from other block");
1186 if (skip_Proj(flags) == prev) {
1187 sparc_emitf(NULL, "nop");
1189 emit_sparc_branch(node, get_fcc);
1192 static void emit_sparc_Ba(const ir_node *node)
1194 if (ba_is_fallthrough(node)) {
1195 if (be_options.verbose_asm) {
1196 sparc_emitf(node, "/* fallthrough to %L */", node);
1199 sparc_emitf(node, "ba %L", node);
1200 fill_delay_slot(node);
1204 static void emit_sparc_SwitchJmp(const ir_node *node)
1206 const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node);
1208 sparc_emitf(node, "jmp %S0");
1209 fill_delay_slot(node);
1211 be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target);
1214 static void emit_fmov(const ir_node *node, const arch_register_t *src_reg,
1215 const arch_register_t *dst_reg)
1217 sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg);
1220 static const arch_register_t *get_next_fp_reg(const arch_register_t *reg)
1222 unsigned idx = reg->global_index;
1223 assert(reg == &sparc_registers[idx]);
1225 assert(idx - REG_F0 < N_sparc_fp_REGS);
1226 return &sparc_registers[idx];
1229 static void emit_be_Copy(const ir_node *node)
1231 ir_mode *mode = get_irn_mode(node);
1232 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
1233 const arch_register_t *dst_reg = arch_get_irn_register_out(node, 0);
1235 if (src_reg == dst_reg)
1238 if (mode_is_float(mode)) {
1239 unsigned bits = get_mode_size_bits(mode);
1240 int n = bits > 32 ? bits > 64 ? 3 : 1 : 0;
1241 emit_fmov(node, src_reg, dst_reg);
1242 for (int i = 0; i < n; ++i) {
1243 src_reg = get_next_fp_reg(src_reg);
1244 dst_reg = get_next_fp_reg(dst_reg);
1245 emit_fmov(node, src_reg, dst_reg);
1247 } else if (mode_is_data(mode)) {
1248 sparc_emitf(node, "mov %S0, %D0");
1250 panic("invalid mode");
1255 * Enters the emitter functions for handled nodes into the generic
1256 * pointer of an opcode.
1258 static void sparc_register_emitters(void)
1260 /* first clear the generic function pointer for all ops */
1261 ir_clear_opcodes_generic_func();
1262 /* register all emitter functions defined in spec */
1263 sparc_register_spec_emitters();
1265 /* custom emitter */
1266 be_set_emitter(op_be_Copy, emit_be_Copy);
1267 be_set_emitter(op_be_CopyKeep, emit_be_Copy);
1268 be_set_emitter(op_be_IncSP, emit_be_IncSP);
1269 be_set_emitter(op_be_MemPerm, emit_be_MemPerm);
1270 be_set_emitter(op_be_Perm, emit_be_Perm);
1271 be_set_emitter(op_sparc_Ba, emit_sparc_Ba);
1272 be_set_emitter(op_sparc_Bicc, emit_sparc_Bicc);
1273 be_set_emitter(op_sparc_Call, emit_sparc_Call);
1274 be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
1275 be_set_emitter(op_sparc_Restore, emit_sparc_Restore);
1276 be_set_emitter(op_sparc_Return, emit_sparc_Return);
1277 be_set_emitter(op_sparc_SDiv, emit_sparc_SDiv);
1278 be_set_emitter(op_sparc_SubSP, emit_sparc_SubSP);
1279 be_set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp);
1280 be_set_emitter(op_sparc_UDiv, emit_sparc_UDiv);
1281 be_set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc);
1283 /* no need to emit anything for the following nodes */
1284 be_set_emitter(op_Phi, be_emit_nothing);
1285 be_set_emitter(op_be_Keep, be_emit_nothing);
1286 be_set_emitter(op_sparc_Start, be_emit_nothing);
1289 static bool block_needs_label(const ir_node *block, const ir_node *sched_prev)
1291 if (get_Block_entity(block) != NULL)
1294 int n_cfgpreds = get_Block_n_cfgpreds(block);
1295 if (n_cfgpreds == 0) {
1297 } else if (n_cfgpreds > 1) {
1300 ir_node *cfgpred = get_Block_cfgpred(block, 0);
1301 ir_node *cfgpred_block = get_nodes_block(cfgpred);
1302 if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred)))
1304 return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block;
1309 * Walks over the nodes in a block connected by scheduling edges
1310 * and emits code for each node.
1312 static void sparc_emit_block(ir_node *block, ir_node *prev)
1314 bool needs_label = block_needs_label(block, prev);
1315 be_gas_begin_block(block, needs_label);
1317 sched_foreach(block, node) {
1318 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
1325 * Emits code for function start.
1327 static void sparc_emit_func_prolog(ir_graph *irg)
1329 ir_entity *entity = get_irg_entity(irg);
1330 be_gas_emit_function_prolog(entity, 4, NULL);
1334 * Emits code for function end
1336 static void sparc_emit_func_epilog(ir_graph *irg)
1338 ir_entity *entity = get_irg_entity(irg);
1339 be_gas_emit_function_epilog(entity);
1342 static void init_jump_links(ir_node *block, void *env)
1346 int n = get_Block_n_cfgpreds(block);
1347 for (n--; n >= 0; n--) {
1348 ir_node *pred = get_Block_cfgpred(block, n);
1349 set_jump_target(pred, block);
1353 static int cmp_block_execfreqs(const void *d1, const void *d2)
1355 ir_node **p1 = (ir_node**)d1;
1356 ir_node **p2 = (ir_node**)d2;
1357 double freq1 = get_block_execfreq(*p1);
1358 double freq2 = get_block_execfreq(*p2);
1363 return get_irn_node_nr(*p2)-get_irn_node_nr(*p1);
1366 static void pick_delay_slots(size_t n_blocks, ir_node **blocks)
1368 /* create blocklist sorted by execution frequency */
1369 ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks);
1370 memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0]));
1371 qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]),
1372 cmp_block_execfreqs);
1374 for (size_t i = 0; i < n_blocks; ++i) {
1375 sched_foreach(sorted_blocks[i], node) {
1376 if (!has_delay_slot(node))
1378 ir_node *filler = pick_delay_slot_for(node);
1381 rbitset_set(delay_slot_fillers, get_irn_idx(filler));
1382 pmap_insert(delay_slots, node, filler);
1387 void sparc_emit_routine(ir_graph *irg)
1389 heights = heights_new(irg);
1390 delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg));
1391 delay_slots = pmap_create();
1393 /* register all emitter functions */
1394 sparc_register_emitters();
1396 /* create the block schedule. For now, we don't need it earlier. */
1397 ir_node **block_schedule = be_create_block_schedule(irg);
1399 sparc_emit_func_prolog(irg);
1400 irg_block_walk_graph(irg, init_jump_links, NULL, NULL);
1402 /* inject block scheduling links & emit code of each block */
1403 size_t n_blocks = ARR_LEN(block_schedule);
1404 for (size_t i = 0; i < n_blocks; ++i) {
1405 ir_node *block = block_schedule[i];
1406 ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL;
1407 set_irn_link(block, next_block);
1410 pick_delay_slots(n_blocks, block_schedule);
1412 for (size_t i = 0; i < n_blocks; ++i) {
1413 ir_node *block = block_schedule[i];
1414 ir_node *prev = i>=1 ? block_schedule[i-1] : NULL;
1415 if (block == get_irg_end_block(irg))
1417 sparc_emit_block(block, prev);
1420 /* emit function epilog */
1421 sparc_emit_func_epilog(irg);
1423 pmap_destroy(delay_slots);
1424 xfree(delay_slot_fillers);
1425 heights_free(heights);
1428 void sparc_init_emitter(void)
1430 FIRM_DBG_REGISTER(dbg, "firm.be.sparc.emit");