Fixed some problems due to refactoring in previous revisions.
[libfirm] / ir / be / ia32 / ia32_common_transform.c
index 48fb83f..d81b1cf 100644 (file)
 #include "ircons.h"
 #include "irprintf.h"
 #include "typerep.h"
+#include "bitset.h"
 
 #include "../betranshlp.h"
-#include "../beirg_t.h"
+#include "../beirg.h"
+#include "../beabi.h"
 
 #include "ia32_architecture.h"
 #include "ia32_common_transform.h"
@@ -72,13 +74,8 @@ static int check_immediate_constraint(long val, char immediate_constraint_type)
        }
 }
 
-/**
- * creates a unique ident by adding a number to a tag
- *
- * @param tag   the tag string, must contain a %d if a number
- *              should be added
- */
-static ident *unique_id(const char *tag)
+/* creates a unique ident by adding a number to a tag */
+ident *ia32_unique_id(const char *tag)
 {
        static unsigned id = 0;
        char str[256];
@@ -88,7 +85,7 @@ static ident *unique_id(const char *tag)
 }
 
 /**
- * Get a primitive type for a mode.
+ * Get a primitive type for a mode with alignment 16.
  */
 static ir_type *ia32_get_prim_type(pmap *types, ir_mode *mode)
 {
@@ -99,7 +96,9 @@ static ir_type *ia32_get_prim_type(pmap *types, ir_mode *mode)
                char buf[64];
                snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
                res = new_type_primitive(new_id_from_str(buf), mode);
-               set_type_alignment_bytes(res, 16);
+               if (get_mode_size_bits(mode) >= 80) {
+                       set_type_alignment_bytes(res, 16);
+               }
                pmap_insert(types, mode, res);
        }
        else
@@ -143,7 +142,7 @@ ir_entity *create_float_const_entity(ir_node *cnst)
                } else
                        tp = ia32_get_prim_type(isa->types, mode);
 
-               res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
+               res = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
 
                set_entity_ld_ident(res, get_entity_ident(res));
                set_entity_visibility(res, visibility_local);
@@ -165,12 +164,12 @@ ir_entity *create_float_const_entity(ir_node *cnst)
        return res;
 }
 
-ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
+ir_node *ia32_create_Immediate(ir_entity *symconst, int symconst_sign, long val)
 {
        ir_graph *irg         = current_ir_graph;
        ir_node  *start_block = get_irg_start_block(irg);
        ir_node  *immediate   = new_bd_ia32_Immediate(NULL, start_block, symconst,
-                       symconst_sign, val);
+                       symconst_sign, no_pic_adjust, val);
        arch_set_irn_register(immediate, &ia32_gp_regs[REG_GP_NOREG]);
 
        return immediate;
@@ -439,6 +438,18 @@ static void parse_asm_constraints(constraint_t *constraint, const char *c,
        constraint->immediate_type        = immediate_type;
 }
 
+static bool can_match(const arch_register_req_t *in,
+                      const arch_register_req_t *out)
+{
+       if (in->cls != out->cls)
+               return false;
+       if ( (in->type & arch_register_req_type_limited) == 0
+               || (out->type & arch_register_req_type_limited) == 0 )
+               return true;
+
+       return (*in->limited & *out->limited) != 0;
+}
+
 ir_node *gen_ASM(ir_node *node)
 {
        ir_node                    *block = NULL;
@@ -461,6 +472,8 @@ ir_node *gen_ASM(ir_node *node)
        ident                     **clobbers;
        int                         clobbers_flags = 0;
        unsigned                    clobber_bits[N_CLASSES];
+       int                         out_size;
+       backend_info_t             *info;
 
        memset(&clobber_bits, 0, sizeof(clobber_bits));
 
@@ -533,7 +546,8 @@ ir_node *gen_ASM(ir_node *node)
        memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
 
        /* construct output constraints */
-       out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
+       out_size = out_arity + 1;
+       out_reg_reqs = obstack_alloc(obst, out_size * sizeof(out_reg_reqs[0]));
 
        for (out_idx = 0; out_idx < n_out_constraints; ++out_idx) {
                const ir_asm_constraint   *constraint = &out_constraints[out_idx];
@@ -638,13 +652,139 @@ ir_node *gen_ASM(ir_node *node)
                ++out_idx;
        }
 
+       /* Attempt to make ASM node register pressure faithful.
+        * (This does not work for complicated cases yet!)
+        *
+        * Algorithm: Check if there are fewer inputs or outputs (I will call this
+        * the smaller list). Then try to match each constraint of the smaller list
+        * to 1 of the other list. If we can't match it, then we have to add a dummy
+        * input/output to the other list
+        *
+        * FIXME: This is still broken in lots of cases. But at least better than
+        *        before...
+        * FIXME: need to do this per register class...
+        */
+       if (out_arity <= arity) {
+               int       orig_arity = arity;
+               int       in_size    = arity;
+               int       o;
+               bitset_t *used_ins = bitset_alloca(arity);
+               for (o = 0; o < out_arity; ++o) {
+                       int   i;
+                       const arch_register_req_t *outreq = out_reg_reqs[o];
+
+                       if (outreq->cls == NULL) {
+                               continue;
+                       }
+
+                       for (i = 0; i < orig_arity; ++i) {
+                               const arch_register_req_t *inreq;
+                               if (bitset_is_set(used_ins, i))
+                                       continue;
+                               inreq = in_reg_reqs[i];
+                               if (!can_match(outreq, inreq))
+                                       continue;
+                               bitset_set(used_ins, i);
+                               break;
+                       }
+                       /* did we find any match? */
+                       if (i < orig_arity)
+                               continue;
+
+                       /* we might need more space in the input arrays */
+                       if (arity >= in_size) {
+                               const arch_register_req_t **new_in_reg_reqs;
+                               ir_node             **new_in;
+
+                               in_size *= 2;
+                               new_in_reg_reqs
+                                       = obstack_alloc(obst, in_size*sizeof(in_reg_reqs[0]));
+                               memcpy(new_in_reg_reqs, in_reg_reqs, arity * sizeof(new_in_reg_reqs[0]));
+                               new_in = ALLOCANZ(ir_node*, in_size);
+                               memcpy(new_in, in, arity*sizeof(new_in[0]));
+
+                               in_reg_reqs = new_in_reg_reqs;
+                               in          = new_in;
+                       }
+
+                       /* add a new (dummy) input which occupies the register */
+                       assert(outreq->type & arch_register_req_type_limited);
+                       in_reg_reqs[arity] = outreq;
+                       in[arity]          = new_bd_ia32_ProduceVal(NULL, block);
+                       be_dep_on_frame(in[arity]);
+                       ++arity;
+               }
+       } else {
+               int       i;
+               bitset_t *used_outs = bitset_alloca(out_arity);
+               int       orig_out_arity = out_arity;
+               for (i = 0; i < arity; ++i) {
+                       int   o;
+                       const arch_register_req_t *inreq = in_reg_reqs[i];
+
+                       if (inreq->cls == NULL) {
+                               continue;
+                       }
+
+                       for (o = 0; o < orig_out_arity; ++o) {
+                               const arch_register_req_t *outreq;
+                               if (bitset_is_set(used_outs, o))
+                                       continue;
+                               outreq = out_reg_reqs[o];
+                               if (!can_match(outreq, inreq))
+                                       continue;
+                               bitset_set(used_outs, i);
+                               break;
+                       }
+                       /* did we find any match? */
+                       if (o < orig_out_arity)
+                               continue;
+
+                       /* we might need more space in the output arrays */
+                       if (out_arity >= out_size) {
+                               const arch_register_req_t **new_out_reg_reqs;
+
+                               out_size *= 2;
+                               new_out_reg_reqs
+                                       = obstack_alloc(obst, out_size*sizeof(out_reg_reqs[0]));
+                               memcpy(new_out_reg_reqs, out_reg_reqs,
+                                      out_arity * sizeof(new_out_reg_reqs[0]));
+                               out_reg_reqs = new_out_reg_reqs;
+                       }
+
+                       /* add a new (dummy) output which occupies the register */
+                       assert(inreq->type & arch_register_req_type_limited);
+                       out_reg_reqs[out_arity] = inreq;
+                       ++out_arity;
+               }
+       }
+
+       /* append none register requirement for the memory output */
+       if (out_arity + 1 >= out_size) {
+               const arch_register_req_t **new_out_reg_reqs;
+
+               out_size = out_arity + 1;
+               new_out_reg_reqs
+                       = obstack_alloc(obst, out_size*sizeof(out_reg_reqs[0]));
+               memcpy(new_out_reg_reqs, out_reg_reqs,
+                          out_arity * sizeof(new_out_reg_reqs[0]));
+               out_reg_reqs = new_out_reg_reqs;
+       }
+
+       /* add a new (dummy) output which occupies the register */
+       out_reg_reqs[out_arity] = arch_no_register_req;
+       ++out_arity;
+
        new_node = new_bd_ia32_Asm(dbgi, new_block, arity, in, out_arity,
                                   get_ASM_text(node), register_map);
 
        if (arity == 0)
                be_dep_on_frame(new_node);
 
-       set_ia32_out_req_all(new_node, out_reg_reqs);
+       info = be_get_info(new_node);
+       for (i = 0; i < out_arity; ++i) {
+               info->out_infos[i].req = out_reg_reqs[i];
+       }
        set_ia32_in_req_all(new_node, in_reg_reqs);
 
        SET_IA32_ORIG_NODE(new_node, node);
@@ -695,7 +835,7 @@ ir_node *gen_CopyB(ir_node *node) {
                rem = size & 0x3; /* size % 4 */
                size >>= 2;
 
-               res = new_bd_ia32_Const(dbgi, block, NULL, 0, size);
+               res = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, size);
                be_dep_on_frame(res);
 
                res = new_bd_ia32_CopyB(dbgi, block, new_dst, new_src, res, new_mem, rem);
@@ -976,7 +1116,7 @@ ir_node *try_create_Immediate(ir_node *node, char immediate_constraint_type)
                offset = tarval_neg(offset);
        }
 
-       new_node = create_Immediate(symconst_ent, symconst_sign, val);
+       new_node = ia32_create_Immediate(symconst_ent, symconst_sign, val);
 
        return new_node;
 }