2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief emit assembler for a backend graph
23 * @author Hannes Rapp, Matthias Braun
29 #include "bitfiddle.h"
41 #include "raw_bitset.h"
45 #include "execfreq_t.h"
48 #include "beblocksched.h"
54 #include "bepeephole.h"
56 #include "sparc_emitter.h"
57 #include "gen_sparc_emitter.h"
58 #include "sparc_nodes_attr.h"
59 #include "sparc_new_nodes.h"
60 #include "gen_sparc_regalloc_if.h"
62 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
64 static ir_heights_t *heights;
65 static unsigned *delay_slot_fillers;
66 static pmap *delay_slots;
68 static void sparc_emit_node(const ir_node *node);
69 static bool emitting_delay_slot;
72 * indent before instruction. (Adds additional indentation when emitting
75 static void sparc_emit_indent(void)
78 if (emitting_delay_slot)
82 static void sparc_emit_immediate(ir_node const *const node)
84 const sparc_attr_t *attr = get_sparc_attr_const(node);
85 ir_entity *entity = attr->immediate_value_entity;
88 int32_t value = attr->immediate_value;
89 assert(sparc_is_value_imm_encodeable(value));
90 be_emit_irprintf("%d", value);
92 if (get_entity_owner(entity) == get_tls_type()) {
93 be_emit_cstring("%tle_lox10(");
95 be_emit_cstring("%lo(");
97 be_gas_emit_entity(entity);
98 if (attr->immediate_value != 0) {
99 be_emit_irprintf("%+d", attr->immediate_value);
105 static void sparc_emit_high_immediate(ir_node const *node)
107 const sparc_attr_t *attr = get_sparc_attr_const(node);
108 ir_entity *entity = attr->immediate_value_entity;
110 if (entity == NULL) {
111 uint32_t value = (uint32_t) attr->immediate_value;
112 be_emit_irprintf("%%hi(0x%X)", value);
114 if (get_entity_owner(entity) == get_tls_type()) {
115 be_emit_cstring("%tle_hix22(");
117 be_emit_cstring("%hi(");
119 be_gas_emit_entity(entity);
120 if (attr->immediate_value != 0) {
121 be_emit_irprintf("%+d", attr->immediate_value);
127 static void sparc_emit_source_register(ir_node const *node, int const pos)
129 const arch_register_t *reg = arch_get_irn_register_in(node, pos);
131 be_emit_string(arch_register_get_name(reg));
134 static void sparc_emit_dest_register(ir_node const *const node, int const pos)
136 const arch_register_t *reg = arch_get_irn_register_out(node, pos);
138 be_emit_string(arch_register_get_name(reg));
144 static void sparc_emit_offset(const ir_node *node, int offset_node_pos)
146 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
148 if (attr->is_reg_reg) {
149 assert(!attr->is_frame_entity);
150 assert(attr->base.immediate_value == 0);
151 assert(attr->base.immediate_value_entity == NULL);
153 sparc_emit_source_register(node, offset_node_pos);
154 } else if (attr->is_frame_entity) {
155 int32_t offset = attr->base.immediate_value;
157 assert(sparc_is_value_imm_encodeable(offset));
158 be_emit_irprintf("%+ld", offset);
160 } else if (attr->base.immediate_value != 0
161 || attr->base.immediate_value_entity != NULL) {
163 sparc_emit_immediate(node);
170 static void sparc_emit_load_mode(ir_node const *const node)
172 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
173 ir_mode *mode = attr->load_store_mode;
174 int bits = get_mode_size_bits(mode);
175 bool is_signed = mode_is_signed(mode);
178 case 8: be_emit_string(is_signed ? "sb" : "ub"); break;
179 case 16: be_emit_string(is_signed ? "sh" : "uh"); break;
181 case 64: be_emit_char('d'); break;
182 case 128: be_emit_char('q'); break;
183 default: panic("invalid load/store mode %+F", mode);
188 * Emit store mode char
190 static void sparc_emit_store_mode(ir_node const *const node)
192 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
193 ir_mode *mode = attr->load_store_mode;
194 int bits = get_mode_size_bits(mode);
197 case 8: be_emit_char('b'); break;
198 case 16: be_emit_char('h'); break;
200 case 64: be_emit_char('d'); break;
201 case 128: be_emit_char('q'); break;
202 default: panic("invalid load/store mode %+F", mode);
206 static void emit_fp_suffix(const ir_mode *mode)
208 assert(mode_is_float(mode));
209 switch (get_mode_size_bits(mode)) {
210 case 32: be_emit_char('s'); break;
211 case 64: be_emit_char('d'); break;
212 case 128: be_emit_char('q'); break;
213 default: panic("invalid FP mode");
217 static void set_jump_target(ir_node *jump, ir_node *target)
219 set_irn_link(jump, target);
222 static ir_node *get_jump_target(const ir_node *jump)
224 return (ir_node*)get_irn_link(jump);
228 * Returns the target label for a control flow node.
230 static void sparc_emit_cfop_target(const ir_node *node)
232 ir_node *block = get_jump_target(node);
233 be_gas_emit_block_name(block);
237 * returns true if a sparc_call calls a register and not an immediate
239 static bool is_sparc_reg_call(const ir_node *node)
241 const sparc_attr_t *attr = get_sparc_attr_const(node);
242 return attr->immediate_value_entity == NULL;
245 static int get_sparc_Call_dest_addr_pos(const ir_node *node)
247 assert(is_sparc_reg_call(node));
248 return get_irn_arity(node)-1;
251 static bool ba_is_fallthrough(const ir_node *node)
253 ir_node *block = get_nodes_block(node);
254 ir_node *next_block = (ir_node*)get_irn_link(block);
255 return get_jump_target(node) == next_block;
258 static bool is_no_instruction(const ir_node *node)
260 /* copies are nops if src_reg == dest_reg */
261 if (be_is_Copy(node) || be_is_CopyKeep(node)) {
262 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
263 const arch_register_t *dest_reg = arch_get_irn_register_out(node, 0);
265 if (src_reg == dest_reg)
268 if (be_is_IncSP(node) && be_get_IncSP_offset(node) == 0)
270 /* Ba is not emitted if it is a simple fallthrough */
271 if (is_sparc_Ba(node) && ba_is_fallthrough(node))
274 return be_is_Keep(node) || be_is_Start(node) || is_Phi(node);
277 static bool has_delay_slot(const ir_node *node)
279 if (is_sparc_Ba(node)) {
280 return !ba_is_fallthrough(node);
283 return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot;
286 /** returns true if the emitter for this sparc node can produce more than one
287 * actual sparc instruction.
288 * Usually it is a bad sign if we have to add instructions here. We should
289 * rather try to get them lowered down. So we can actually put them into
290 * delay slots and make them more accessible to the scheduler.
292 static bool emits_multiple_instructions(const ir_node *node)
294 if (has_delay_slot(node))
297 if (is_sparc_Call(node))
298 return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return;
300 return is_sparc_SMulh(node) || is_sparc_UMulh(node)
301 || is_sparc_SDiv(node) || is_sparc_UDiv(node)
302 || be_is_MemPerm(node) || be_is_Perm(node)
303 || is_sparc_SubSP(node);
306 static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width)
308 int arity = get_irn_arity(node);
309 for (int i = 0; i < arity; ++i) {
310 const arch_register_t *in_reg = arch_get_irn_register_in(node, i);
311 const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i);
314 if (reg_index < (unsigned)in_reg->global_index + in_req->width
315 && reg_index + width > in_reg->global_index)
321 static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width)
323 unsigned n_outs = arch_get_irn_n_outs(node);
324 for (unsigned o = 0; o < n_outs; ++o) {
325 const arch_register_t *out_reg = arch_get_irn_register_out(node, o);
328 const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o);
329 if (reg_index < (unsigned)out_reg->global_index + out_req->width
330 && reg_index + width > out_reg->global_index)
336 static bool is_legal_delay_slot_filler(const ir_node *node)
338 if (is_no_instruction(node))
340 if (emits_multiple_instructions(node))
342 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
347 static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to)
349 if (!is_legal_delay_slot_filler(node))
352 if (!be_can_move_down(heights, node, to))
355 if (is_sparc_Call(to)) {
357 /** all inputs are used after the delay slot so, we're fine */
358 if (!is_sparc_reg_call(to))
361 check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to));
362 if (skip_Proj(check) == node)
365 /* the Call also destroys the value of %o7, but since this is
366 * currently marked as ignore register in the backend, it
367 * should never be used by the instruction in the delay slot. */
368 if (uses_reg(node, REG_O7, 1))
371 } else if (is_sparc_Return(to)) {
372 /* return uses the value of %o7, all other values are not
373 * immediately used */
374 if (writes_reg(node, REG_O7, 1))
378 /* the node must not use our computed values */
379 int arity = get_irn_arity(to);
380 for (int i = 0; i < arity; ++i) {
381 ir_node *in = get_irn_n(to, i);
382 if (skip_Proj(in) == node)
389 static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to)
391 if (!be_can_move_up(heights, node, to))
394 /* node must not use any results of 'to' */
395 int arity = get_irn_arity(node);
396 for (int i = 0; i < arity; ++i) {
397 ir_node *in = get_irn_n(node, i);
398 ir_node *skipped = skip_Proj(in);
403 /* register window cycling effects at Restore aren't correctly represented
404 * in the graph yet so we need this exception here */
405 if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) {
407 } else if (is_sparc_Call(to)) {
408 /* node must not overwrite any of the inputs of the call,
409 * (except for the dest_addr) */
410 int dest_addr_pos = is_sparc_reg_call(to)
411 ? get_sparc_Call_dest_addr_pos(to) : -1;
413 int call_arity = get_irn_arity(to);
414 for (int i = 0; i < call_arity; ++i) {
415 if (i == dest_addr_pos)
417 const arch_register_t *reg = arch_get_irn_register_in(to, i);
420 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
421 if (writes_reg(node, reg->global_index, req->width))
425 /* node must not write to one of the call outputs */
426 unsigned n_call_outs = arch_get_irn_n_outs(to);
427 for (unsigned o = 0; o < n_call_outs; ++o) {
428 const arch_register_t *reg = arch_get_irn_register_out(to, o);
431 const arch_register_req_t *req = arch_get_irn_register_req_out(to, o);
432 if (writes_reg(node, reg->global_index, req->width))
435 } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) {
436 /* node will be inserted between wr and div so it must not overwrite
437 * anything except the wr input */
438 int arity = get_irn_arity(to);
439 for (int i = 0; i < arity; ++i) {
440 assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high);
441 if (i == n_sparc_SDiv_dividend_high)
443 const arch_register_t *reg = arch_get_irn_register_in(to, i);
446 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
447 if (writes_reg(node, reg->global_index, req->width))
454 static void optimize_fallthrough(ir_node *node)
456 ir_node *proj_true = NULL;
457 ir_node *proj_false = NULL;
459 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
460 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
461 foreach_out_edge(node, edge) {
462 ir_node *proj = get_edge_src_irn(edge);
463 long nr = get_Proj_proj(proj);
464 if (nr == pn_sparc_Bicc_true) {
467 assert(nr == pn_sparc_Bicc_false);
471 assert(proj_true != NULL && proj_false != NULL);
473 /* for now, the code works for scheduled and non-schedules blocks */
474 const ir_node *block = get_nodes_block(node);
476 /* we have a block schedule */
477 const ir_node *next_block = (ir_node*)get_irn_link(block);
479 if (get_jump_target(proj_true) == next_block) {
480 /* exchange both proj destinations so the second one can be omitted */
481 set_Proj_proj(proj_true, pn_sparc_Bicc_false);
482 set_Proj_proj(proj_false, pn_sparc_Bicc_true);
484 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
485 attr->relation = get_negated_relation(attr->relation);
490 * search for an instruction that can fill the delay slot of @p node
492 static ir_node *pick_delay_slot_for(ir_node *node)
494 static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10;
495 assert(has_delay_slot(node));
497 if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
498 optimize_fallthrough(node);
502 sched_foreach_reverse_from(sched_prev(node), schedpoint) {
503 if (has_delay_slot(schedpoint))
505 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
508 if (!can_move_down_into_delayslot(schedpoint, node))
511 /* found something */
515 /* search after the current position */
517 sched_foreach_from(sched_next(node), schedpoint) {
518 if (has_delay_slot(schedpoint))
520 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
522 if (!is_legal_delay_slot_filler(schedpoint))
524 if (!can_move_up_into_delayslot(schedpoint, node))
527 /* found something */
531 /* look in successor blocks */
532 ir_node *block = get_nodes_block(node);
533 /* TODO: sort succs by execution frequency */
534 foreach_block_succ(block, edge) {
535 ir_node *succ = get_edge_src_irn(edge);
536 /* we can't easily move up stuff from blocks with multiple predecessors
537 * since the instruction is lacking for the other preds then.
538 * (We also don't have to do any phi translation) */
539 if (get_Block_n_cfgpreds(succ) > 1)
543 sched_foreach(succ, schedpoint) {
544 if (has_delay_slot(schedpoint))
546 /* can't move pinned nodes accross blocks */
547 if (get_irn_pinned(schedpoint) == op_pin_state_pinned)
549 /* restore doesn't model register window switching correctly,
550 * so it appears like we could move it, which is not true */
551 if (is_sparc_Restore(schedpoint)
552 || is_sparc_RestoreZero(schedpoint))
554 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
556 if (!is_legal_delay_slot_filler(schedpoint))
558 if (can_move_up_into_delayslot(schedpoint, node)) {
559 /* it's fine to move the insn accross blocks */
561 } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
562 ir_node *proj = get_Block_cfgpred(succ, 0);
563 long nr = get_Proj_proj(proj);
564 if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true)
565 && be_can_move_up(heights, schedpoint, succ)) {
566 /* we can use it with the annul flag */
567 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
568 attr->annul_delay_slot = true;
578 void sparc_emitf(ir_node const *const node, char const *fmt, ...)
584 char const *start = fmt;
586 while (*fmt != '%' && *fmt != '\0')
588 be_emit_string_len(start, fmt - start);
605 const sparc_jmp_cond_attr_t *attr
606 = get_sparc_jmp_cond_attr_const(node);
607 if (attr->annul_delay_slot) {
608 be_emit_cstring(",a");
614 if (*fmt < '0' || '9' <= *fmt)
616 sparc_emit_dest_register(node, *fmt++ - '0');
620 sparc_attr_t const *const attr = get_sparc_attr_const(node);
621 be_gas_emit_entity(attr->immediate_value_entity);
622 if (attr->immediate_value != 0) {
623 be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value);
631 case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break;
632 case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode; break;
633 case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode; break;
634 default: goto unknown;
636 emit_fp_suffix(mode);
641 sparc_emit_high_immediate(node);
645 ir_node *n = va_arg(ap, ir_node*);
646 sparc_emit_cfop_target(n);
652 case 'L': sparc_emit_load_mode(node); break;
653 case 'S': sparc_emit_store_mode(node); break;
654 default: goto unknown;
659 if (*fmt < '0' || '9' <= *fmt)
661 sparc_emit_offset(node, *fmt++ - '0');
665 arch_register_t const *const reg = va_arg(ap, const arch_register_t*);
667 be_emit_string(arch_register_get_name(reg));
677 if (*fmt < '0' || '9' <= *fmt)
679 unsigned const pos = *fmt++ - '0';
680 if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) {
681 sparc_emit_immediate(node);
683 sparc_emit_source_register(node, pos);
689 int const num = va_arg(ap, int);
690 be_emit_irprintf(plus ? "%+d" : "%d", num);
695 char const *const str = va_arg(ap, char const*);
701 unsigned const num = va_arg(ap, unsigned);
702 be_emit_irprintf(plus ? "%+u" : "%u", num);
708 panic("unknown format conversion in sparc_emitf()");
711 be_emit_finish_line_gas(node);
716 * Emits code for stack space management
718 static void emit_be_IncSP(const ir_node *irn)
720 int offset = be_get_IncSP_offset(irn);
725 /* SPARC stack grows downwards */
726 char const *const insn = offset > 0 ? offset = -offset, "add" : "sub";
727 sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset);
731 * Emits code for stack space management.
733 static void emit_sparc_SubSP(const ir_node *irn)
735 sparc_emitf(irn, "sub %S0, %SI1, %D0");
736 sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE);
739 static void fill_delay_slot(const ir_node *node)
741 emitting_delay_slot = true;
742 const ir_node *filler = pmap_get(ir_node, delay_slots, node);
743 if (filler != NULL) {
744 assert(!is_no_instruction(filler));
745 assert(!emits_multiple_instructions(filler));
746 sparc_emit_node(filler);
748 sparc_emitf(NULL, "nop");
750 emitting_delay_slot = false;
753 static void emit_sparc_Div(const ir_node *node, char const *const insn)
755 sparc_emitf(node, "wr %S0, 0, %%y");
757 /* TODO: we should specify number of delayslots in an architecture
759 unsigned wry_delay_count = 3;
760 for (unsigned i = 0; i < wry_delay_count; ++i) {
762 fill_delay_slot(node);
764 emitting_delay_slot = true;
765 sparc_emitf(NULL, "nop");
766 emitting_delay_slot = false;
770 sparc_emitf(node, "%s %S1, %SI2, %D0", insn);
773 static void emit_sparc_SDiv(const ir_node *node)
775 emit_sparc_Div(node, "sdiv");
778 static void emit_sparc_UDiv(const ir_node *node)
780 emit_sparc_Div(node, "udiv");
783 static void emit_sparc_Call(const ir_node *node)
785 if (is_sparc_reg_call(node)) {
786 int dest_addr = get_sparc_Call_dest_addr_pos(node);
787 sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr));
789 sparc_emitf(node, "call %E, 0");
792 fill_delay_slot(node);
794 if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) {
795 sparc_emitf(NULL, "unimp 8");
799 static void emit_be_Perm(const ir_node *irn)
801 ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
802 if (mode_is_float(mode)) {
803 const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0);
804 const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1);
805 unsigned reg_idx0 = reg0->global_index;
806 unsigned reg_idx1 = reg1->global_index;
807 unsigned width = arch_get_irn_register_req_in(irn, 0)->width;
808 for (unsigned i = 0; i < width; ++i) {
809 const arch_register_t *r0 = &sparc_registers[reg_idx0+i];
810 const arch_register_t *r1 = &sparc_registers[reg_idx1+i];
811 sparc_emitf(irn, "fmovs %R, %%f31", r0);
812 sparc_emitf(irn, "fmovs %R, %R", r1, r0);
813 sparc_emitf(irn, "fmovs %%f31, %R", r1);
816 sparc_emitf(irn, "xor %S1, %S0, %S0");
817 sparc_emitf(irn, "xor %S1, %S0, %S1");
818 sparc_emitf(irn, "xor %S1, %S0, %S0");
822 /* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get
823 * the next bigger integer that's evenly divisible by it. */
824 static unsigned get_aligned_sp_change(const unsigned num_regs)
826 const unsigned bytes = num_regs * SPARC_REGISTER_SIZE;
827 return round_up2(bytes, SPARC_STACK_ALIGNMENT);
830 /* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/
831 static void memperm_emit_spill_registers(const ir_node *node, int n_spilled,
834 assert(n_spilled < n_to_spill);
836 if (n_spilled == 0) {
837 /* We always reserve stack space for two registers because during copy
838 * processing we don't know yet if we also need to handle a cycle which
839 * needs two registers. More complicated code in emit_MemPerm would
840 * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but
841 * it is not worth the worse readability of emit_MemPerm. */
843 /* Keep stack pointer aligned. */
844 unsigned sp_change = get_aligned_sp_change(2);
845 sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change);
847 /* Spill register l0. */
848 sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE);
851 if (n_to_spill == 2) {
852 /* Spill register l1. */
853 sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
857 /* Restore register l0 or both l0 and l1, depending on n_spilled. */
858 static void memperm_emit_restore_registers(const ir_node *node, int n_spilled)
860 if (n_spilled == 2) {
861 /* Restore register l1. */
862 sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
865 /* Restore register l0. */
866 sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE);
868 /* Restore stack pointer. */
869 unsigned sp_change = get_aligned_sp_change(2);
870 sparc_emitf(node, "add %%sp, %u, %%sp", sp_change);
873 /* Emit code to copy in_ent to out_ent. Only uses l0. */
874 static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent,
877 ir_graph *irg = get_irn_irg(node);
878 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
879 int off_in = be_get_stack_entity_offset(layout, in_ent, 0);
880 int off_out = be_get_stack_entity_offset(layout, out_ent, 0);
882 /* Load from input entity. */
883 sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in);
884 /* Store to output entity. */
885 sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out);
888 /* Emit code to swap ent1 and ent2. Uses l0 and l1. */
889 static void memperm_emit_swap(const ir_node *node, ir_entity *ent1,
892 ir_graph *irg = get_irn_irg(node);
893 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
894 int off1 = be_get_stack_entity_offset(layout, ent1, 0);
895 int off2 = be_get_stack_entity_offset(layout, ent2, 0);
897 /* Load from first input entity. */
898 sparc_emitf(node, "ld [%%fp%+d], %%l0", off1);
899 /* Load from second input entity. */
900 sparc_emitf(node, "ld [%%fp%+d], %%l1", off2);
901 /* Store first value to second output entity. */
902 sparc_emitf(node, "st %%l0, [%%fp%+d]", off2);
903 /* Store second value to first output entity. */
904 sparc_emitf(node, "st %%l1, [%%fp%+d]", off1);
907 /* Find the index of ent in ents or return -1 if not found. */
908 static int get_index(ir_entity **ents, int n, ir_entity *ent)
910 for (int i = 0; i < n; ++i) {
919 * Emit code for a MemPerm node.
921 * Analyze MemPerm for copy chains and cyclic swaps and resolve them using
923 * This function is conceptually very similar to permute_values in
926 static void emit_be_MemPerm(const ir_node *node)
928 int memperm_arity = be_get_MemPerm_entity_arity(node);
929 /* Upper limit for the number of participating entities is twice the
930 * arity, e.g., for a simple copying MemPerm node with one input/output. */
931 int max_size = 2 * memperm_arity;
932 ir_entity **entities = ALLOCANZ(ir_entity *, max_size);
933 /* sourceof contains the input entity for each entity. If an entity is
934 * never used as an output, its entry in sourceof is a fix point. */
935 int *sourceof = ALLOCANZ(int, max_size);
936 /* n_users counts how many output entities use this entity as their input.*/
937 int *n_users = ALLOCANZ(int, max_size);
938 /* n_spilled records the number of spilled registers, either 1 or 2. */
941 /* This implementation currently only works with frame pointers. */
942 ir_graph *irg = get_irn_irg(node);
943 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
944 assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers");
946 for (int i = 0; i < max_size; ++i) {
951 for (int i = 0; i < memperm_arity; ++i) {
952 ir_entity *out = be_get_MemPerm_out_entity(node, i);
953 ir_entity *in = be_get_MemPerm_in_entity(node, i);
955 /* Insert into entities to be able to operate on unique indices. */
956 if (get_index(entities, n, out) == -1)
958 if (get_index(entities, n, in) == -1)
961 int oidx = get_index(entities, n, out);
962 int iidx = get_index(entities, n, in);
964 sourceof[oidx] = iidx; /* Remember the source. */
965 ++n_users[iidx]; /* Increment number of users of this entity. */
968 /* First do all the copies. */
969 for (int oidx = 0; oidx < n; /* empty */) {
970 int iidx = sourceof[oidx];
972 /* Nothing to do for fix points.
973 * Also, if entities[oidx] is used as an input by another copy, we
974 * can't overwrite entities[oidx] yet.*/
975 if (iidx == oidx || n_users[oidx] > 0) {
980 /* We found the end of a 'chain', so do the copy. */
981 if (n_spilled == 0) {
982 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1);
985 memperm_emit_copy(node, entities[iidx], entities[oidx]);
988 sourceof[oidx] = oidx;
990 assert(n_users[iidx] > 0);
991 /* Decrementing the number of users might enable us to do another
995 if (iidx < oidx && n_users[iidx] == 0) {
1002 /* The rest are cycles. */
1003 for (int oidx = 0; oidx < n; /* empty */) {
1004 int iidx = sourceof[oidx];
1006 /* Nothing to do for fix points. */
1012 assert(n_users[iidx] == 1);
1014 /* Swap the two values to resolve the cycle. */
1015 if (n_spilled < 2) {
1016 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2);
1019 memperm_emit_swap(node, entities[iidx], entities[oidx]);
1021 int tidx = sourceof[iidx];
1023 sourceof[iidx] = iidx;
1025 /* The source of oidx is now the old source of iidx, because we swapped
1026 * the two entities. */
1027 sourceof[oidx] = tidx;
1030 #ifdef DEBUG_libfirm
1031 /* Only fix points should remain. */
1032 for (int i = 0; i < max_size; ++i) {
1033 assert(sourceof[i] == i);
1037 assert(n_spilled > 0 && "Useless MemPerm node");
1039 memperm_emit_restore_registers(node, n_spilled);
1042 static void emit_sparc_Return(const ir_node *node)
1044 ir_graph *irg = get_irn_irg(node);
1045 ir_entity *entity = get_irg_entity(irg);
1046 ir_type *type = get_entity_type(entity);
1048 const char *destreg = "%o7";
1050 /* hack: we don't explicitely model register changes because of the
1051 * restore node. So we have to do it manually here */
1052 const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node);
1053 if (delay_slot != NULL &&
1054 (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) {
1057 char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8";
1058 sparc_emitf(node, "jmp %s+%s", destreg, offset);
1059 fill_delay_slot(node);
1062 static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg)
1064 unsigned idx = reg->global_index;
1065 if (idx < REG_I0 || idx > REG_I7)
1067 idx += REG_O0 - REG_I0;
1068 assert(REG_O0 <= idx && idx <= REG_O7);
1069 return &sparc_registers[idx];
1072 static void emit_sparc_Restore(const ir_node *node)
1074 const arch_register_t *destreg
1075 = arch_get_irn_register_out(node, pn_sparc_Restore_res);
1076 sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg));
1079 static void emit_sparc_FrameAddr(const ir_node *node)
1081 const sparc_attr_t *attr = get_sparc_attr_const(node);
1082 int32_t offset = attr->immediate_value;
1084 char const *const insn = offset > 0 ? offset = -offset, "sub" : "add";
1085 assert(sparc_is_value_imm_encodeable(offset));
1086 sparc_emitf(node, "%s %S0, %d, %D0", insn, offset);
1089 static const char *get_icc_unsigned(ir_relation relation)
1091 switch (relation & (ir_relation_less_equal_greater)) {
1092 case ir_relation_false: return "bn";
1093 case ir_relation_equal: return "be";
1094 case ir_relation_less: return "blu";
1095 case ir_relation_less_equal: return "bleu";
1096 case ir_relation_greater: return "bgu";
1097 case ir_relation_greater_equal: return "bgeu";
1098 case ir_relation_less_greater: return "bne";
1099 case ir_relation_less_equal_greater: return "ba";
1100 default: panic("Cmp has unsupported relation");
1104 static const char *get_icc_signed(ir_relation relation)
1106 switch (relation & (ir_relation_less_equal_greater)) {
1107 case ir_relation_false: return "bn";
1108 case ir_relation_equal: return "be";
1109 case ir_relation_less: return "bl";
1110 case ir_relation_less_equal: return "ble";
1111 case ir_relation_greater: return "bg";
1112 case ir_relation_greater_equal: return "bge";
1113 case ir_relation_less_greater: return "bne";
1114 case ir_relation_less_equal_greater: return "ba";
1115 default: panic("Cmp has unsupported relation");
1119 static const char *get_fcc(ir_relation relation)
1122 case ir_relation_false: return "fbn";
1123 case ir_relation_equal: return "fbe";
1124 case ir_relation_less: return "fbl";
1125 case ir_relation_less_equal: return "fble";
1126 case ir_relation_greater: return "fbg";
1127 case ir_relation_greater_equal: return "fbge";
1128 case ir_relation_less_greater: return "fblg";
1129 case ir_relation_less_equal_greater: return "fbo";
1130 case ir_relation_unordered: return "fbu";
1131 case ir_relation_unordered_equal: return "fbue";
1132 case ir_relation_unordered_less: return "fbul";
1133 case ir_relation_unordered_less_equal: return "fbule";
1134 case ir_relation_unordered_greater: return "fbug";
1135 case ir_relation_unordered_greater_equal: return "fbuge";
1136 case ir_relation_unordered_less_greater: return "fbne";
1137 case ir_relation_true: return "fba";
1139 panic("invalid relation");
1142 typedef const char* (*get_cc_func)(ir_relation relation);
1144 static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc)
1146 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1147 ir_relation relation = attr->relation;
1148 const ir_node *proj_true = NULL;
1149 const ir_node *proj_false = NULL;
1151 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
1152 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
1153 foreach_out_edge(node, edge) {
1154 ir_node *proj = get_edge_src_irn(edge);
1155 long nr = get_Proj_proj(proj);
1156 if (nr == pn_sparc_Bicc_true) {
1159 assert(nr == pn_sparc_Bicc_false);
1164 /* emit the true proj */
1165 sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true);
1166 fill_delay_slot(node);
1168 const ir_node *block = get_nodes_block(node);
1169 const ir_node *next_block = (ir_node*)get_irn_link(block);
1171 if (get_jump_target(proj_false) == next_block) {
1172 if (be_options.verbose_asm) {
1173 sparc_emitf(node, "/* fallthrough to %L */", proj_false);
1176 sparc_emitf(node, "ba %L", proj_false);
1177 /* TODO: fill this slot as well */
1178 emitting_delay_slot = true;
1179 sparc_emitf(NULL, "nop");
1180 emitting_delay_slot = false;
1184 static void emit_sparc_Bicc(const ir_node *node)
1186 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1187 bool is_unsigned = attr->is_unsigned;
1188 emit_sparc_branch(node, is_unsigned ? get_icc_unsigned : get_icc_signed);
1191 static void emit_sparc_fbfcc(const ir_node *node)
1193 /* if the flags producing node was immediately in front of us, emit
1195 ir_node *flags = get_irn_n(node, n_sparc_fbfcc_flags);
1196 ir_node *prev = sched_prev(node);
1197 if (is_Block(prev)) {
1198 /* TODO: when the flags come from another block, then we have to do
1199 * more complicated tests to see wether the flag producing node is
1200 * potentially in front of us (could happen for fallthroughs) */
1201 panic("TODO: fbfcc flags come from other block");
1203 if (skip_Proj(flags) == prev) {
1204 sparc_emitf(NULL, "nop");
1206 emit_sparc_branch(node, get_fcc);
1209 static void emit_sparc_Ba(const ir_node *node)
1211 if (ba_is_fallthrough(node)) {
1212 if (be_options.verbose_asm) {
1213 sparc_emitf(node, "/* fallthrough to %L */", node);
1216 sparc_emitf(node, "ba %L", node);
1217 fill_delay_slot(node);
1221 static void emit_sparc_SwitchJmp(const ir_node *node)
1223 const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node);
1225 sparc_emitf(node, "jmp %S0");
1226 fill_delay_slot(node);
1228 be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target);
1231 static void emit_fmov(const ir_node *node, const arch_register_t *src_reg,
1232 const arch_register_t *dst_reg)
1234 sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg);
1237 static const arch_register_t *get_next_fp_reg(const arch_register_t *reg)
1239 unsigned idx = reg->global_index;
1240 assert(reg == &sparc_registers[idx]);
1242 assert(idx - REG_F0 < N_sparc_fp_REGS);
1243 return &sparc_registers[idx];
1246 static void emit_be_Copy(const ir_node *node)
1248 ir_mode *mode = get_irn_mode(node);
1249 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
1250 const arch_register_t *dst_reg = arch_get_irn_register_out(node, 0);
1252 if (src_reg == dst_reg)
1255 if (mode_is_float(mode)) {
1256 unsigned bits = get_mode_size_bits(mode);
1257 int n = bits > 32 ? bits > 64 ? 3 : 1 : 0;
1258 emit_fmov(node, src_reg, dst_reg);
1259 for (int i = 0; i < n; ++i) {
1260 src_reg = get_next_fp_reg(src_reg);
1261 dst_reg = get_next_fp_reg(dst_reg);
1262 emit_fmov(node, src_reg, dst_reg);
1264 } else if (mode_is_data(mode)) {
1265 sparc_emitf(node, "mov %S0, %D0");
1267 panic("invalid mode");
1271 static void emit_nothing(const ir_node *irn)
1276 typedef void (*emit_func) (const ir_node *);
1278 static inline void set_emitter(ir_op *op, emit_func sparc_emit_node)
1280 op->ops.generic = (op_func)sparc_emit_node;
1284 * Enters the emitter functions for handled nodes into the generic
1285 * pointer of an opcode.
1287 static void sparc_register_emitters(void)
1289 /* first clear the generic function pointer for all ops */
1290 ir_clear_opcodes_generic_func();
1291 /* register all emitter functions defined in spec */
1292 sparc_register_spec_emitters();
1294 /* custom emitter */
1295 set_emitter(op_be_Copy, emit_be_Copy);
1296 set_emitter(op_be_CopyKeep, emit_be_Copy);
1297 set_emitter(op_be_IncSP, emit_be_IncSP);
1298 set_emitter(op_be_MemPerm, emit_be_MemPerm);
1299 set_emitter(op_be_Perm, emit_be_Perm);
1300 set_emitter(op_sparc_Ba, emit_sparc_Ba);
1301 set_emitter(op_sparc_Bicc, emit_sparc_Bicc);
1302 set_emitter(op_sparc_Call, emit_sparc_Call);
1303 set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc);
1304 set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
1305 set_emitter(op_sparc_SubSP, emit_sparc_SubSP);
1306 set_emitter(op_sparc_Restore, emit_sparc_Restore);
1307 set_emitter(op_sparc_Return, emit_sparc_Return);
1308 set_emitter(op_sparc_SDiv, emit_sparc_SDiv);
1309 set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp);
1310 set_emitter(op_sparc_UDiv, emit_sparc_UDiv);
1312 /* no need to emit anything for the following nodes */
1313 set_emitter(op_be_Keep, emit_nothing);
1314 set_emitter(op_sparc_Start, emit_nothing);
1315 set_emitter(op_Phi, emit_nothing);
1319 * Emits code for a node.
1321 static void sparc_emit_node(const ir_node *node)
1323 ir_op *op = get_irn_op(node);
1325 if (op->ops.generic) {
1326 emit_func func = (emit_func) op->ops.generic;
1327 be_dwarf_location(get_irn_dbg_info(node));
1330 panic("No emit handler for node %+F (graph %+F)\n", node,
1335 static bool block_needs_label(const ir_node *block, const ir_node *sched_prev)
1337 if (get_Block_entity(block) != NULL)
1340 int n_cfgpreds = get_Block_n_cfgpreds(block);
1341 if (n_cfgpreds == 0) {
1343 } else if (n_cfgpreds > 1) {
1346 ir_node *cfgpred = get_Block_cfgpred(block, 0);
1347 ir_node *cfgpred_block = get_nodes_block(cfgpred);
1348 if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred)))
1350 return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block;
1355 * Walks over the nodes in a block connected by scheduling edges
1356 * and emits code for each node.
1358 static void sparc_emit_block(ir_node *block, ir_node *prev)
1360 bool needs_label = block_needs_label(block, prev);
1361 be_gas_begin_block(block, needs_label);
1363 sched_foreach(block, node) {
1364 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
1366 sparc_emit_node(node);
1371 * Emits code for function start.
1373 static void sparc_emit_func_prolog(ir_graph *irg)
1375 ir_entity *entity = get_irg_entity(irg);
1376 be_gas_emit_function_prolog(entity, 4, NULL);
1380 * Emits code for function end
1382 static void sparc_emit_func_epilog(ir_graph *irg)
1384 ir_entity *entity = get_irg_entity(irg);
1385 be_gas_emit_function_epilog(entity);
1388 static void init_jump_links(ir_node *block, void *env)
1392 int n = get_Block_n_cfgpreds(block);
1393 for (n--; n >= 0; n--) {
1394 ir_node *pred = get_Block_cfgpred(block, n);
1395 set_jump_target(pred, block);
1399 static int cmp_block_execfreqs(const void *d1, const void *d2)
1401 ir_node **p1 = (ir_node**)d1;
1402 ir_node **p2 = (ir_node**)d2;
1403 double freq1 = get_block_execfreq(*p1);
1404 double freq2 = get_block_execfreq(*p2);
1409 return get_irn_node_nr(*p2)-get_irn_node_nr(*p1);
1412 static void pick_delay_slots(size_t n_blocks, ir_node **blocks)
1414 /* create blocklist sorted by execution frequency */
1415 ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks);
1416 memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0]));
1417 qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]),
1418 cmp_block_execfreqs);
1420 for (size_t i = 0; i < n_blocks; ++i) {
1421 const ir_node *block = sorted_blocks[i];
1422 sched_foreach(block, node) {
1423 if (!has_delay_slot(node))
1425 ir_node *filler = pick_delay_slot_for(node);
1428 rbitset_set(delay_slot_fillers, get_irn_idx(filler));
1429 pmap_insert(delay_slots, node, filler);
1434 void sparc_emit_routine(ir_graph *irg)
1436 heights = heights_new(irg);
1437 delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg));
1438 delay_slots = pmap_create();
1440 /* register all emitter functions */
1441 sparc_register_emitters();
1443 /* create the block schedule. For now, we don't need it earlier. */
1444 ir_node **block_schedule = be_create_block_schedule(irg);
1446 sparc_emit_func_prolog(irg);
1447 irg_block_walk_graph(irg, init_jump_links, NULL, NULL);
1449 /* inject block scheduling links & emit code of each block */
1450 size_t n_blocks = ARR_LEN(block_schedule);
1451 for (size_t i = 0; i < n_blocks; ++i) {
1452 ir_node *block = block_schedule[i];
1453 ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL;
1454 set_irn_link(block, next_block);
1457 pick_delay_slots(n_blocks, block_schedule);
1459 for (size_t i = 0; i < n_blocks; ++i) {
1460 ir_node *block = block_schedule[i];
1461 ir_node *prev = i>=1 ? block_schedule[i-1] : NULL;
1462 if (block == get_irg_end_block(irg))
1464 sparc_emit_block(block, prev);
1467 /* emit function epilog */
1468 sparc_emit_func_epilog(irg);
1470 pmap_destroy(delay_slots);
1471 xfree(delay_slot_fillers);
1472 heights_free(heights);
1475 void sparc_init_emitter(void)
1477 FIRM_DBG_REGISTER(dbg, "firm.be.sparc.emit");