2 * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief emit assembler for a backend graph
23 * @author Hannes Rapp, Matthias Braun
29 #include "bitfiddle.h"
41 #include "raw_bitset.h"
45 #include "execfreq_t.h"
48 #include "beblocksched.h"
54 #include "bepeephole.h"
56 #include "sparc_emitter.h"
57 #include "gen_sparc_emitter.h"
58 #include "sparc_nodes_attr.h"
59 #include "sparc_new_nodes.h"
60 #include "gen_sparc_regalloc_if.h"
62 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
64 static ir_heights_t *heights;
65 static unsigned *delay_slot_fillers;
66 static pmap *delay_slots;
68 static void sparc_emit_node(const ir_node *node);
69 static bool emitting_delay_slot;
72 * indent before instruction. (Adds additional indentation when emitting
75 static void sparc_emit_indent(void)
78 if (emitting_delay_slot)
82 static void sparc_emit_immediate(ir_node const *const node)
84 const sparc_attr_t *attr = get_sparc_attr_const(node);
85 ir_entity *entity = attr->immediate_value_entity;
88 int32_t value = attr->immediate_value;
89 assert(sparc_is_value_imm_encodeable(value));
90 be_emit_irprintf("%d", value);
92 if (get_entity_owner(entity) == get_tls_type()) {
93 be_emit_cstring("%tle_lox10(");
95 be_emit_cstring("%lo(");
97 be_gas_emit_entity(entity);
98 if (attr->immediate_value != 0) {
99 be_emit_irprintf("%+d", attr->immediate_value);
105 static void sparc_emit_high_immediate(ir_node const *node)
107 const sparc_attr_t *attr = get_sparc_attr_const(node);
108 ir_entity *entity = attr->immediate_value_entity;
110 if (entity == NULL) {
111 uint32_t value = (uint32_t) attr->immediate_value;
112 be_emit_irprintf("%%hi(0x%X)", value);
114 if (get_entity_owner(entity) == get_tls_type()) {
115 be_emit_cstring("%tle_hix22(");
117 be_emit_cstring("%hi(");
119 be_gas_emit_entity(entity);
120 if (attr->immediate_value != 0) {
121 be_emit_irprintf("%+d", attr->immediate_value);
127 static void sparc_emit_source_register(ir_node const *node, int const pos)
129 const arch_register_t *reg = arch_get_irn_register_in(node, pos);
131 be_emit_string(reg->name);
134 static void sparc_emit_dest_register(ir_node const *const node, int const pos)
136 const arch_register_t *reg = arch_get_irn_register_out(node, pos);
138 be_emit_string(reg->name);
144 static void sparc_emit_offset(const ir_node *node, int offset_node_pos)
146 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
148 if (attr->is_reg_reg) {
149 assert(!attr->is_frame_entity);
150 assert(attr->base.immediate_value == 0);
151 assert(attr->base.immediate_value_entity == NULL);
153 sparc_emit_source_register(node, offset_node_pos);
154 } else if (attr->is_frame_entity) {
155 int32_t offset = attr->base.immediate_value;
157 assert(sparc_is_value_imm_encodeable(offset));
158 be_emit_irprintf("%+ld", offset);
160 } else if (attr->base.immediate_value != 0
161 || attr->base.immediate_value_entity != NULL) {
163 sparc_emit_immediate(node);
170 static void sparc_emit_load_mode(ir_node const *const node)
172 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
173 ir_mode *mode = attr->load_store_mode;
174 int bits = get_mode_size_bits(mode);
175 bool is_signed = mode_is_signed(mode);
178 case 8: be_emit_string(is_signed ? "sb" : "ub"); break;
179 case 16: be_emit_string(is_signed ? "sh" : "uh"); break;
181 case 64: be_emit_char('d'); break;
182 case 128: be_emit_char('q'); break;
183 default: panic("invalid load/store mode %+F", mode);
188 * Emit store mode char
190 static void sparc_emit_store_mode(ir_node const *const node)
192 const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
193 ir_mode *mode = attr->load_store_mode;
194 int bits = get_mode_size_bits(mode);
197 case 8: be_emit_char('b'); break;
198 case 16: be_emit_char('h'); break;
200 case 64: be_emit_char('d'); break;
201 case 128: be_emit_char('q'); break;
202 default: panic("invalid load/store mode %+F", mode);
206 static void emit_fp_suffix(const ir_mode *mode)
208 assert(mode_is_float(mode));
209 switch (get_mode_size_bits(mode)) {
210 case 32: be_emit_char('s'); break;
211 case 64: be_emit_char('d'); break;
212 case 128: be_emit_char('q'); break;
213 default: panic("invalid FP mode");
217 static void set_jump_target(ir_node *jump, ir_node *target)
219 set_irn_link(jump, target);
222 static ir_node *get_jump_target(const ir_node *jump)
224 return (ir_node*)get_irn_link(jump);
228 * Returns the target label for a control flow node.
230 static void sparc_emit_cfop_target(const ir_node *node)
232 ir_node *block = get_jump_target(node);
233 be_gas_emit_block_name(block);
237 * returns true if a sparc_call calls a register and not an immediate
239 static bool is_sparc_reg_call(const ir_node *node)
241 const sparc_attr_t *attr = get_sparc_attr_const(node);
242 return attr->immediate_value_entity == NULL;
245 static int get_sparc_Call_dest_addr_pos(const ir_node *node)
247 assert(is_sparc_reg_call(node));
248 return get_irn_arity(node)-1;
251 static bool ba_is_fallthrough(const ir_node *node)
253 ir_node *block = get_nodes_block(node);
254 ir_node *next_block = (ir_node*)get_irn_link(block);
255 return get_jump_target(node) == next_block;
258 static bool is_no_instruction(const ir_node *node)
260 /* copies are nops if src_reg == dest_reg */
261 if (be_is_Copy(node) || be_is_CopyKeep(node)) {
262 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
263 const arch_register_t *dest_reg = arch_get_irn_register_out(node, 0);
265 if (src_reg == dest_reg)
268 if (be_is_IncSP(node) && be_get_IncSP_offset(node) == 0)
270 /* Ba is not emitted if it is a simple fallthrough */
271 if (is_sparc_Ba(node) && ba_is_fallthrough(node))
274 return be_is_Keep(node) || be_is_Start(node) || is_Phi(node);
277 static bool has_delay_slot(const ir_node *node)
279 if (is_sparc_Ba(node)) {
280 return !ba_is_fallthrough(node);
283 return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot;
286 /** returns true if the emitter for this sparc node can produce more than one
287 * actual sparc instruction.
288 * Usually it is a bad sign if we have to add instructions here. We should
289 * rather try to get them lowered down. So we can actually put them into
290 * delay slots and make them more accessible to the scheduler.
292 static bool emits_multiple_instructions(const ir_node *node)
294 if (has_delay_slot(node))
297 if (is_sparc_Call(node))
298 return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return;
300 return is_sparc_SMulh(node) || is_sparc_UMulh(node)
301 || is_sparc_SDiv(node) || is_sparc_UDiv(node)
302 || be_is_MemPerm(node) || be_is_Perm(node)
303 || is_sparc_SubSP(node);
306 static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width)
308 int arity = get_irn_arity(node);
309 for (int i = 0; i < arity; ++i) {
310 const arch_register_t *in_reg = arch_get_irn_register_in(node, i);
311 const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i);
314 if (reg_index < (unsigned)in_reg->global_index + in_req->width
315 && reg_index + width > in_reg->global_index)
321 static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width)
323 be_foreach_out(node, o) {
324 const arch_register_t *out_reg = arch_get_irn_register_out(node, o);
327 const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o);
328 if (reg_index < (unsigned)out_reg->global_index + out_req->width
329 && reg_index + width > out_reg->global_index)
335 static bool is_legal_delay_slot_filler(const ir_node *node)
337 if (is_no_instruction(node))
339 if (emits_multiple_instructions(node))
341 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
346 static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to)
348 if (!is_legal_delay_slot_filler(node))
351 if (!be_can_move_down(heights, node, to))
354 if (is_sparc_Call(to)) {
356 /** all inputs are used after the delay slot so, we're fine */
357 if (!is_sparc_reg_call(to))
360 check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to));
361 if (skip_Proj(check) == node)
364 /* the Call also destroys the value of %o7, but since this is
365 * currently marked as ignore register in the backend, it
366 * should never be used by the instruction in the delay slot. */
367 if (uses_reg(node, REG_O7, 1))
370 } else if (is_sparc_Return(to)) {
371 /* return uses the value of %o7, all other values are not
372 * immediately used */
373 if (writes_reg(node, REG_O7, 1))
377 /* the node must not use our computed values */
378 int arity = get_irn_arity(to);
379 for (int i = 0; i < arity; ++i) {
380 ir_node *in = get_irn_n(to, i);
381 if (skip_Proj(in) == node)
388 static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to)
390 if (!be_can_move_up(heights, node, to))
393 /* node must not use any results of 'to' */
394 int arity = get_irn_arity(node);
395 for (int i = 0; i < arity; ++i) {
396 ir_node *in = get_irn_n(node, i);
397 ir_node *skipped = skip_Proj(in);
402 /* register window cycling effects at Restore aren't correctly represented
403 * in the graph yet so we need this exception here */
404 if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) {
406 } else if (is_sparc_Call(to)) {
407 /* node must not overwrite any of the inputs of the call,
408 * (except for the dest_addr) */
409 int dest_addr_pos = is_sparc_reg_call(to)
410 ? get_sparc_Call_dest_addr_pos(to) : -1;
412 int call_arity = get_irn_arity(to);
413 for (int i = 0; i < call_arity; ++i) {
414 if (i == dest_addr_pos)
416 const arch_register_t *reg = arch_get_irn_register_in(to, i);
419 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
420 if (writes_reg(node, reg->global_index, req->width))
424 /* node must not write to one of the call outputs */
425 be_foreach_out(to, o) {
426 const arch_register_t *reg = arch_get_irn_register_out(to, o);
429 const arch_register_req_t *req = arch_get_irn_register_req_out(to, o);
430 if (writes_reg(node, reg->global_index, req->width))
433 } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) {
434 /* node will be inserted between wr and div so it must not overwrite
435 * anything except the wr input */
436 int arity = get_irn_arity(to);
437 for (int i = 0; i < arity; ++i) {
438 assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high);
439 if (i == n_sparc_SDiv_dividend_high)
441 const arch_register_t *reg = arch_get_irn_register_in(to, i);
444 const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
445 if (writes_reg(node, reg->global_index, req->width))
452 static void optimize_fallthrough(ir_node *node)
454 ir_node *proj_true = NULL;
455 ir_node *proj_false = NULL;
457 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
458 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
459 foreach_out_edge(node, edge) {
460 ir_node *proj = get_edge_src_irn(edge);
461 long nr = get_Proj_proj(proj);
462 if (nr == pn_sparc_Bicc_true) {
465 assert(nr == pn_sparc_Bicc_false);
469 assert(proj_true != NULL && proj_false != NULL);
471 /* for now, the code works for scheduled and non-schedules blocks */
472 const ir_node *block = get_nodes_block(node);
474 /* we have a block schedule */
475 const ir_node *next_block = (ir_node*)get_irn_link(block);
477 if (get_jump_target(proj_true) == next_block) {
478 /* exchange both proj destinations so the second one can be omitted */
479 set_Proj_proj(proj_true, pn_sparc_Bicc_false);
480 set_Proj_proj(proj_false, pn_sparc_Bicc_true);
482 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
483 attr->relation = get_negated_relation(attr->relation);
488 * search for an instruction that can fill the delay slot of @p node
490 static ir_node *pick_delay_slot_for(ir_node *node)
492 static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10;
493 assert(has_delay_slot(node));
495 if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
496 optimize_fallthrough(node);
500 sched_foreach_reverse_from(sched_prev(node), schedpoint) {
501 if (has_delay_slot(schedpoint))
503 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
506 if (!can_move_down_into_delayslot(schedpoint, node))
509 /* found something */
513 /* search after the current position */
515 sched_foreach_from(sched_next(node), schedpoint) {
516 if (has_delay_slot(schedpoint))
518 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
520 if (!is_legal_delay_slot_filler(schedpoint))
522 if (!can_move_up_into_delayslot(schedpoint, node))
525 /* found something */
529 /* look in successor blocks */
530 ir_node *block = get_nodes_block(node);
531 /* TODO: sort succs by execution frequency */
532 foreach_block_succ(block, edge) {
533 ir_node *succ = get_edge_src_irn(edge);
534 /* we can't easily move up stuff from blocks with multiple predecessors
535 * since the instruction is lacking for the other preds then.
536 * (We also don't have to do any phi translation) */
537 if (get_Block_n_cfgpreds(succ) > 1)
541 sched_foreach(succ, schedpoint) {
542 if (has_delay_slot(schedpoint))
544 /* can't move pinned nodes accross blocks */
545 if (get_irn_pinned(schedpoint) == op_pin_state_pinned)
547 /* restore doesn't model register window switching correctly,
548 * so it appears like we could move it, which is not true */
549 if (is_sparc_Restore(schedpoint)
550 || is_sparc_RestoreZero(schedpoint))
552 if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
554 if (!is_legal_delay_slot_filler(schedpoint))
556 if (can_move_up_into_delayslot(schedpoint, node)) {
557 /* it's fine to move the insn accross blocks */
559 } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
560 ir_node *proj = get_Block_cfgpred(succ, 0);
561 long nr = get_Proj_proj(proj);
562 if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true)
563 && be_can_move_up(heights, schedpoint, succ)) {
564 /* we can use it with the annul flag */
565 sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
566 attr->annul_delay_slot = true;
576 void sparc_emitf(ir_node const *const node, char const *fmt, ...)
582 char const *start = fmt;
584 while (*fmt != '%' && *fmt != '\0')
586 be_emit_string_len(start, fmt - start);
603 const sparc_jmp_cond_attr_t *attr
604 = get_sparc_jmp_cond_attr_const(node);
605 if (attr->annul_delay_slot) {
606 be_emit_cstring(",a");
612 if (*fmt < '0' || '9' <= *fmt)
614 sparc_emit_dest_register(node, *fmt++ - '0');
618 sparc_attr_t const *const attr = get_sparc_attr_const(node);
619 be_gas_emit_entity(attr->immediate_value_entity);
620 if (attr->immediate_value != 0) {
621 be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value);
629 case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break;
630 case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode; break;
631 case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode; break;
632 default: goto unknown;
634 emit_fp_suffix(mode);
639 sparc_emit_high_immediate(node);
643 ir_node *n = va_arg(ap, ir_node*);
644 sparc_emit_cfop_target(n);
650 case 'L': sparc_emit_load_mode(node); break;
651 case 'S': sparc_emit_store_mode(node); break;
652 default: goto unknown;
657 if (*fmt < '0' || '9' <= *fmt)
659 sparc_emit_offset(node, *fmt++ - '0');
663 arch_register_t const *const reg = va_arg(ap, const arch_register_t*);
665 be_emit_string(reg->name);
675 if (*fmt < '0' || '9' <= *fmt)
677 unsigned const pos = *fmt++ - '0';
678 if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) {
679 sparc_emit_immediate(node);
681 sparc_emit_source_register(node, pos);
687 int const num = va_arg(ap, int);
688 be_emit_irprintf(plus ? "%+d" : "%d", num);
693 char const *const str = va_arg(ap, char const*);
699 unsigned const num = va_arg(ap, unsigned);
700 be_emit_irprintf(plus ? "%+u" : "%u", num);
706 panic("unknown format conversion in sparc_emitf()");
709 be_emit_finish_line_gas(node);
714 * Emits code for stack space management
716 static void emit_be_IncSP(const ir_node *irn)
718 int offset = be_get_IncSP_offset(irn);
723 /* SPARC stack grows downwards */
724 char const *const insn = offset > 0 ? offset = -offset, "add" : "sub";
725 sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset);
729 * Emits code for stack space management.
731 static void emit_sparc_SubSP(const ir_node *irn)
733 sparc_emitf(irn, "sub %S0, %SI1, %D0");
734 sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE);
737 static void fill_delay_slot(const ir_node *node)
739 emitting_delay_slot = true;
740 const ir_node *filler = pmap_get(ir_node, delay_slots, node);
741 if (filler != NULL) {
742 assert(!is_no_instruction(filler));
743 assert(!emits_multiple_instructions(filler));
744 sparc_emit_node(filler);
746 sparc_emitf(NULL, "nop");
748 emitting_delay_slot = false;
751 static void emit_sparc_Div(const ir_node *node, char const *const insn)
753 sparc_emitf(node, "wr %S0, 0, %%y");
755 /* TODO: we should specify number of delayslots in an architecture
757 unsigned wry_delay_count = 3;
758 for (unsigned i = 0; i < wry_delay_count; ++i) {
760 fill_delay_slot(node);
762 emitting_delay_slot = true;
763 sparc_emitf(NULL, "nop");
764 emitting_delay_slot = false;
768 sparc_emitf(node, "%s %S1, %SI2, %D0", insn);
771 static void emit_sparc_SDiv(const ir_node *node)
773 emit_sparc_Div(node, "sdiv");
776 static void emit_sparc_UDiv(const ir_node *node)
778 emit_sparc_Div(node, "udiv");
781 static void emit_sparc_Call(const ir_node *node)
783 if (is_sparc_reg_call(node)) {
784 int dest_addr = get_sparc_Call_dest_addr_pos(node);
785 sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr));
787 sparc_emitf(node, "call %E, 0");
790 fill_delay_slot(node);
792 if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) {
793 sparc_emitf(NULL, "unimp 8");
797 static void emit_be_Perm(const ir_node *irn)
799 ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
800 if (mode_is_float(mode)) {
801 const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0);
802 const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1);
803 unsigned reg_idx0 = reg0->global_index;
804 unsigned reg_idx1 = reg1->global_index;
805 unsigned width = arch_get_irn_register_req_in(irn, 0)->width;
806 for (unsigned i = 0; i < width; ++i) {
807 const arch_register_t *r0 = &sparc_registers[reg_idx0+i];
808 const arch_register_t *r1 = &sparc_registers[reg_idx1+i];
809 sparc_emitf(irn, "fmovs %R, %%f31", r0);
810 sparc_emitf(irn, "fmovs %R, %R", r1, r0);
811 sparc_emitf(irn, "fmovs %%f31, %R", r1);
814 sparc_emitf(irn, "xor %S1, %S0, %S0");
815 sparc_emitf(irn, "xor %S1, %S0, %S1");
816 sparc_emitf(irn, "xor %S1, %S0, %S0");
820 /* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get
821 * the next bigger integer that's evenly divisible by it. */
822 static unsigned get_aligned_sp_change(const unsigned num_regs)
824 const unsigned bytes = num_regs * SPARC_REGISTER_SIZE;
825 return round_up2(bytes, SPARC_STACK_ALIGNMENT);
828 /* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/
829 static void memperm_emit_spill_registers(const ir_node *node, int n_spilled,
832 assert(n_spilled < n_to_spill);
834 if (n_spilled == 0) {
835 /* We always reserve stack space for two registers because during copy
836 * processing we don't know yet if we also need to handle a cycle which
837 * needs two registers. More complicated code in emit_MemPerm would
838 * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but
839 * it is not worth the worse readability of emit_MemPerm. */
841 /* Keep stack pointer aligned. */
842 unsigned sp_change = get_aligned_sp_change(2);
843 sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change);
845 /* Spill register l0. */
846 sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE);
849 if (n_to_spill == 2) {
850 /* Spill register l1. */
851 sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
855 /* Restore register l0 or both l0 and l1, depending on n_spilled. */
856 static void memperm_emit_restore_registers(const ir_node *node, int n_spilled)
858 if (n_spilled == 2) {
859 /* Restore register l1. */
860 sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
863 /* Restore register l0. */
864 sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE);
866 /* Restore stack pointer. */
867 unsigned sp_change = get_aligned_sp_change(2);
868 sparc_emitf(node, "add %%sp, %u, %%sp", sp_change);
871 /* Emit code to copy in_ent to out_ent. Only uses l0. */
872 static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent,
875 ir_graph *irg = get_irn_irg(node);
876 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
877 int off_in = be_get_stack_entity_offset(layout, in_ent, 0);
878 int off_out = be_get_stack_entity_offset(layout, out_ent, 0);
880 /* Load from input entity. */
881 sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in);
882 /* Store to output entity. */
883 sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out);
886 /* Emit code to swap ent1 and ent2. Uses l0 and l1. */
887 static void memperm_emit_swap(const ir_node *node, ir_entity *ent1,
890 ir_graph *irg = get_irn_irg(node);
891 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
892 int off1 = be_get_stack_entity_offset(layout, ent1, 0);
893 int off2 = be_get_stack_entity_offset(layout, ent2, 0);
895 /* Load from first input entity. */
896 sparc_emitf(node, "ld [%%fp%+d], %%l0", off1);
897 /* Load from second input entity. */
898 sparc_emitf(node, "ld [%%fp%+d], %%l1", off2);
899 /* Store first value to second output entity. */
900 sparc_emitf(node, "st %%l0, [%%fp%+d]", off2);
901 /* Store second value to first output entity. */
902 sparc_emitf(node, "st %%l1, [%%fp%+d]", off1);
905 /* Find the index of ent in ents or return -1 if not found. */
906 static int get_index(ir_entity **ents, int n, ir_entity *ent)
908 for (int i = 0; i < n; ++i) {
917 * Emit code for a MemPerm node.
919 * Analyze MemPerm for copy chains and cyclic swaps and resolve them using
921 * This function is conceptually very similar to permute_values in
924 static void emit_be_MemPerm(const ir_node *node)
926 int memperm_arity = be_get_MemPerm_entity_arity(node);
927 /* Upper limit for the number of participating entities is twice the
928 * arity, e.g., for a simple copying MemPerm node with one input/output. */
929 int max_size = 2 * memperm_arity;
930 ir_entity **entities = ALLOCANZ(ir_entity *, max_size);
931 /* sourceof contains the input entity for each entity. If an entity is
932 * never used as an output, its entry in sourceof is a fix point. */
933 int *sourceof = ALLOCANZ(int, max_size);
934 /* n_users counts how many output entities use this entity as their input.*/
935 int *n_users = ALLOCANZ(int, max_size);
936 /* n_spilled records the number of spilled registers, either 1 or 2. */
939 /* This implementation currently only works with frame pointers. */
940 ir_graph *irg = get_irn_irg(node);
941 be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
942 assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers");
944 for (int i = 0; i < max_size; ++i) {
949 for (int i = 0; i < memperm_arity; ++i) {
950 ir_entity *out = be_get_MemPerm_out_entity(node, i);
951 ir_entity *in = be_get_MemPerm_in_entity(node, i);
953 /* Insert into entities to be able to operate on unique indices. */
954 if (get_index(entities, n, out) == -1)
956 if (get_index(entities, n, in) == -1)
959 int oidx = get_index(entities, n, out);
960 int iidx = get_index(entities, n, in);
962 sourceof[oidx] = iidx; /* Remember the source. */
963 ++n_users[iidx]; /* Increment number of users of this entity. */
966 /* First do all the copies. */
967 for (int oidx = 0; oidx < n; /* empty */) {
968 int iidx = sourceof[oidx];
970 /* Nothing to do for fix points.
971 * Also, if entities[oidx] is used as an input by another copy, we
972 * can't overwrite entities[oidx] yet.*/
973 if (iidx == oidx || n_users[oidx] > 0) {
978 /* We found the end of a 'chain', so do the copy. */
979 if (n_spilled == 0) {
980 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1);
983 memperm_emit_copy(node, entities[iidx], entities[oidx]);
986 sourceof[oidx] = oidx;
988 assert(n_users[iidx] > 0);
989 /* Decrementing the number of users might enable us to do another
993 if (iidx < oidx && n_users[iidx] == 0) {
1000 /* The rest are cycles. */
1001 for (int oidx = 0; oidx < n; /* empty */) {
1002 int iidx = sourceof[oidx];
1004 /* Nothing to do for fix points. */
1010 assert(n_users[iidx] == 1);
1012 /* Swap the two values to resolve the cycle. */
1013 if (n_spilled < 2) {
1014 memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2);
1017 memperm_emit_swap(node, entities[iidx], entities[oidx]);
1019 int tidx = sourceof[iidx];
1021 sourceof[iidx] = iidx;
1023 /* The source of oidx is now the old source of iidx, because we swapped
1024 * the two entities. */
1025 sourceof[oidx] = tidx;
1028 #ifdef DEBUG_libfirm
1029 /* Only fix points should remain. */
1030 for (int i = 0; i < max_size; ++i) {
1031 assert(sourceof[i] == i);
1035 assert(n_spilled > 0 && "Useless MemPerm node");
1037 memperm_emit_restore_registers(node, n_spilled);
1040 static void emit_sparc_Return(const ir_node *node)
1042 ir_graph *irg = get_irn_irg(node);
1043 ir_entity *entity = get_irg_entity(irg);
1044 ir_type *type = get_entity_type(entity);
1046 const char *destreg = "%o7";
1048 /* hack: we don't explicitely model register changes because of the
1049 * restore node. So we have to do it manually here */
1050 const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node);
1051 if (delay_slot != NULL &&
1052 (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) {
1055 char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8";
1056 sparc_emitf(node, "jmp %s+%s", destreg, offset);
1057 fill_delay_slot(node);
1060 static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg)
1062 unsigned idx = reg->global_index;
1063 if (idx < REG_I0 || idx > REG_I7)
1065 idx += REG_O0 - REG_I0;
1066 assert(REG_O0 <= idx && idx <= REG_O7);
1067 return &sparc_registers[idx];
1070 static void emit_sparc_Restore(const ir_node *node)
1072 const arch_register_t *destreg
1073 = arch_get_irn_register_out(node, pn_sparc_Restore_res);
1074 sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg));
1077 static void emit_sparc_FrameAddr(const ir_node *node)
1079 const sparc_attr_t *attr = get_sparc_attr_const(node);
1080 int32_t offset = attr->immediate_value;
1082 char const *const insn = offset > 0 ? offset = -offset, "sub" : "add";
1083 assert(sparc_is_value_imm_encodeable(offset));
1084 sparc_emitf(node, "%s %S0, %d, %D0", insn, offset);
1087 static const char *get_icc_unsigned(ir_relation relation)
1089 switch (relation & (ir_relation_less_equal_greater)) {
1090 case ir_relation_false: return "bn";
1091 case ir_relation_equal: return "be";
1092 case ir_relation_less: return "blu";
1093 case ir_relation_less_equal: return "bleu";
1094 case ir_relation_greater: return "bgu";
1095 case ir_relation_greater_equal: return "bgeu";
1096 case ir_relation_less_greater: return "bne";
1097 case ir_relation_less_equal_greater: return "ba";
1098 default: panic("Cmp has unsupported relation");
1102 static const char *get_icc_signed(ir_relation relation)
1104 switch (relation & (ir_relation_less_equal_greater)) {
1105 case ir_relation_false: return "bn";
1106 case ir_relation_equal: return "be";
1107 case ir_relation_less: return "bl";
1108 case ir_relation_less_equal: return "ble";
1109 case ir_relation_greater: return "bg";
1110 case ir_relation_greater_equal: return "bge";
1111 case ir_relation_less_greater: return "bne";
1112 case ir_relation_less_equal_greater: return "ba";
1113 default: panic("Cmp has unsupported relation");
1117 static const char *get_fcc(ir_relation relation)
1120 case ir_relation_false: return "fbn";
1121 case ir_relation_equal: return "fbe";
1122 case ir_relation_less: return "fbl";
1123 case ir_relation_less_equal: return "fble";
1124 case ir_relation_greater: return "fbg";
1125 case ir_relation_greater_equal: return "fbge";
1126 case ir_relation_less_greater: return "fblg";
1127 case ir_relation_less_equal_greater: return "fbo";
1128 case ir_relation_unordered: return "fbu";
1129 case ir_relation_unordered_equal: return "fbue";
1130 case ir_relation_unordered_less: return "fbul";
1131 case ir_relation_unordered_less_equal: return "fbule";
1132 case ir_relation_unordered_greater: return "fbug";
1133 case ir_relation_unordered_greater_equal: return "fbuge";
1134 case ir_relation_unordered_less_greater: return "fbne";
1135 case ir_relation_true: return "fba";
1137 panic("invalid relation");
1140 typedef const char* (*get_cc_func)(ir_relation relation);
1142 static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc)
1144 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1145 ir_relation relation = attr->relation;
1146 const ir_node *proj_true = NULL;
1147 const ir_node *proj_false = NULL;
1149 assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
1150 assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true);
1151 foreach_out_edge(node, edge) {
1152 ir_node *proj = get_edge_src_irn(edge);
1153 long nr = get_Proj_proj(proj);
1154 if (nr == pn_sparc_Bicc_true) {
1157 assert(nr == pn_sparc_Bicc_false);
1162 /* emit the true proj */
1163 sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true);
1164 fill_delay_slot(node);
1166 const ir_node *block = get_nodes_block(node);
1167 const ir_node *next_block = (ir_node*)get_irn_link(block);
1169 if (get_jump_target(proj_false) == next_block) {
1170 if (be_options.verbose_asm) {
1171 sparc_emitf(node, "/* fallthrough to %L */", proj_false);
1174 sparc_emitf(node, "ba %L", proj_false);
1175 /* TODO: fill this slot as well */
1176 emitting_delay_slot = true;
1177 sparc_emitf(NULL, "nop");
1178 emitting_delay_slot = false;
1182 static void emit_sparc_Bicc(const ir_node *node)
1184 const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
1185 bool is_unsigned = attr->is_unsigned;
1186 emit_sparc_branch(node, is_unsigned ? get_icc_unsigned : get_icc_signed);
1189 static void emit_sparc_fbfcc(const ir_node *node)
1191 /* if the flags producing node was immediately in front of us, emit
1193 ir_node *flags = get_irn_n(node, n_sparc_fbfcc_flags);
1194 ir_node *prev = sched_prev(node);
1195 if (is_Block(prev)) {
1196 /* TODO: when the flags come from another block, then we have to do
1197 * more complicated tests to see wether the flag producing node is
1198 * potentially in front of us (could happen for fallthroughs) */
1199 panic("TODO: fbfcc flags come from other block");
1201 if (skip_Proj(flags) == prev) {
1202 sparc_emitf(NULL, "nop");
1204 emit_sparc_branch(node, get_fcc);
1207 static void emit_sparc_Ba(const ir_node *node)
1209 if (ba_is_fallthrough(node)) {
1210 if (be_options.verbose_asm) {
1211 sparc_emitf(node, "/* fallthrough to %L */", node);
1214 sparc_emitf(node, "ba %L", node);
1215 fill_delay_slot(node);
1219 static void emit_sparc_SwitchJmp(const ir_node *node)
1221 const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node);
1223 sparc_emitf(node, "jmp %S0");
1224 fill_delay_slot(node);
1226 be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target);
1229 static void emit_fmov(const ir_node *node, const arch_register_t *src_reg,
1230 const arch_register_t *dst_reg)
1232 sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg);
1235 static const arch_register_t *get_next_fp_reg(const arch_register_t *reg)
1237 unsigned idx = reg->global_index;
1238 assert(reg == &sparc_registers[idx]);
1240 assert(idx - REG_F0 < N_sparc_fp_REGS);
1241 return &sparc_registers[idx];
1244 static void emit_be_Copy(const ir_node *node)
1246 ir_mode *mode = get_irn_mode(node);
1247 const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
1248 const arch_register_t *dst_reg = arch_get_irn_register_out(node, 0);
1250 if (src_reg == dst_reg)
1253 if (mode_is_float(mode)) {
1254 unsigned bits = get_mode_size_bits(mode);
1255 int n = bits > 32 ? bits > 64 ? 3 : 1 : 0;
1256 emit_fmov(node, src_reg, dst_reg);
1257 for (int i = 0; i < n; ++i) {
1258 src_reg = get_next_fp_reg(src_reg);
1259 dst_reg = get_next_fp_reg(dst_reg);
1260 emit_fmov(node, src_reg, dst_reg);
1262 } else if (mode_is_data(mode)) {
1263 sparc_emitf(node, "mov %S0, %D0");
1265 panic("invalid mode");
1269 static void emit_nothing(const ir_node *irn)
1274 typedef void (*emit_func) (const ir_node *);
1276 static inline void set_emitter(ir_op *op, emit_func sparc_emit_node)
1278 op->ops.generic = (op_func)sparc_emit_node;
1282 * Enters the emitter functions for handled nodes into the generic
1283 * pointer of an opcode.
1285 static void sparc_register_emitters(void)
1287 /* first clear the generic function pointer for all ops */
1288 ir_clear_opcodes_generic_func();
1289 /* register all emitter functions defined in spec */
1290 sparc_register_spec_emitters();
1292 /* custom emitter */
1293 set_emitter(op_be_Copy, emit_be_Copy);
1294 set_emitter(op_be_CopyKeep, emit_be_Copy);
1295 set_emitter(op_be_IncSP, emit_be_IncSP);
1296 set_emitter(op_be_MemPerm, emit_be_MemPerm);
1297 set_emitter(op_be_Perm, emit_be_Perm);
1298 set_emitter(op_sparc_Ba, emit_sparc_Ba);
1299 set_emitter(op_sparc_Bicc, emit_sparc_Bicc);
1300 set_emitter(op_sparc_Call, emit_sparc_Call);
1301 set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc);
1302 set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
1303 set_emitter(op_sparc_SubSP, emit_sparc_SubSP);
1304 set_emitter(op_sparc_Restore, emit_sparc_Restore);
1305 set_emitter(op_sparc_Return, emit_sparc_Return);
1306 set_emitter(op_sparc_SDiv, emit_sparc_SDiv);
1307 set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp);
1308 set_emitter(op_sparc_UDiv, emit_sparc_UDiv);
1310 /* no need to emit anything for the following nodes */
1311 set_emitter(op_be_Keep, emit_nothing);
1312 set_emitter(op_sparc_Start, emit_nothing);
1313 set_emitter(op_Phi, emit_nothing);
1317 * Emits code for a node.
1319 static void sparc_emit_node(const ir_node *node)
1321 ir_op *op = get_irn_op(node);
1323 if (op->ops.generic) {
1324 emit_func func = (emit_func) op->ops.generic;
1325 be_dwarf_location(get_irn_dbg_info(node));
1328 panic("No emit handler for node %+F (graph %+F)\n", node,
1333 static bool block_needs_label(const ir_node *block, const ir_node *sched_prev)
1335 if (get_Block_entity(block) != NULL)
1338 int n_cfgpreds = get_Block_n_cfgpreds(block);
1339 if (n_cfgpreds == 0) {
1341 } else if (n_cfgpreds > 1) {
1344 ir_node *cfgpred = get_Block_cfgpred(block, 0);
1345 ir_node *cfgpred_block = get_nodes_block(cfgpred);
1346 if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred)))
1348 return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block;
1353 * Walks over the nodes in a block connected by scheduling edges
1354 * and emits code for each node.
1356 static void sparc_emit_block(ir_node *block, ir_node *prev)
1358 bool needs_label = block_needs_label(block, prev);
1359 be_gas_begin_block(block, needs_label);
1361 sched_foreach(block, node) {
1362 if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
1364 sparc_emit_node(node);
1369 * Emits code for function start.
1371 static void sparc_emit_func_prolog(ir_graph *irg)
1373 ir_entity *entity = get_irg_entity(irg);
1374 be_gas_emit_function_prolog(entity, 4, NULL);
1378 * Emits code for function end
1380 static void sparc_emit_func_epilog(ir_graph *irg)
1382 ir_entity *entity = get_irg_entity(irg);
1383 be_gas_emit_function_epilog(entity);
1386 static void init_jump_links(ir_node *block, void *env)
1390 int n = get_Block_n_cfgpreds(block);
1391 for (n--; n >= 0; n--) {
1392 ir_node *pred = get_Block_cfgpred(block, n);
1393 set_jump_target(pred, block);
1397 static int cmp_block_execfreqs(const void *d1, const void *d2)
1399 ir_node **p1 = (ir_node**)d1;
1400 ir_node **p2 = (ir_node**)d2;
1401 double freq1 = get_block_execfreq(*p1);
1402 double freq2 = get_block_execfreq(*p2);
1407 return get_irn_node_nr(*p2)-get_irn_node_nr(*p1);
1410 static void pick_delay_slots(size_t n_blocks, ir_node **blocks)
1412 /* create blocklist sorted by execution frequency */
1413 ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks);
1414 memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0]));
1415 qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]),
1416 cmp_block_execfreqs);
1418 for (size_t i = 0; i < n_blocks; ++i) {
1419 const ir_node *block = sorted_blocks[i];
1420 sched_foreach(block, node) {
1421 if (!has_delay_slot(node))
1423 ir_node *filler = pick_delay_slot_for(node);
1426 rbitset_set(delay_slot_fillers, get_irn_idx(filler));
1427 pmap_insert(delay_slots, node, filler);
1432 void sparc_emit_routine(ir_graph *irg)
1434 heights = heights_new(irg);
1435 delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg));
1436 delay_slots = pmap_create();
1438 /* register all emitter functions */
1439 sparc_register_emitters();
1441 /* create the block schedule. For now, we don't need it earlier. */
1442 ir_node **block_schedule = be_create_block_schedule(irg);
1444 sparc_emit_func_prolog(irg);
1445 irg_block_walk_graph(irg, init_jump_links, NULL, NULL);
1447 /* inject block scheduling links & emit code of each block */
1448 size_t n_blocks = ARR_LEN(block_schedule);
1449 for (size_t i = 0; i < n_blocks; ++i) {
1450 ir_node *block = block_schedule[i];
1451 ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL;
1452 set_irn_link(block, next_block);
1455 pick_delay_slots(n_blocks, block_schedule);
1457 for (size_t i = 0; i < n_blocks; ++i) {
1458 ir_node *block = block_schedule[i];
1459 ir_node *prev = i>=1 ? block_schedule[i-1] : NULL;
1460 if (block == get_irg_end_block(irg))
1462 sparc_emit_block(block, prev);
1465 /* emit function epilog */
1466 sparc_emit_func_epilog(irg);
1468 pmap_destroy(delay_slots);
1469 xfree(delay_slot_fillers);
1470 heights_free(heights);
1473 void sparc_init_emitter(void)
1475 FIRM_DBG_REGISTER(dbg, "firm.be.sparc.emit");