f9e769c2bcc73a5c50d7e7c1438dceee637617c4
[libfirm] / ir / be / benewalloc.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       New approach to allocation and copy coalescing
23  * @author      Matthias Braun
24  * @date        14.2.2009
25  * @version     $Id$
26  *
27  * ... WE NEED A NAME FOR THIS ...
28  *
29  * Only a proof of concept at this moment...
30  *
31  * The idea is to allocate registers in 2 passes:
32  * 1. A first pass to determine "preferred" registers for live-ranges. This
33  *    calculates for each register and each live-range a value indicating
34  *    the usefulness. (You can roughly think of the value as the negative
35  *    costs needed for copies when the value is in the specific registers...)
36  *
37  * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
38  *    registers with high preferences. When register constraints are not met,
39  *    add copies and split live-ranges.
40  *
41  * TODO:
42  *  - make use of free registers in the permute_values code
43  *  - think about a smarter sequence of visiting the blocks. Sorted by
44  *    execfreq might be good, or looptree from inner to outermost loops going
45  *    over blocks in a reverse postorder
46  *  - propagate preferences through Phis
47  */
48 #include "config.h"
49
50 #include <float.h>
51 #include <stdbool.h>
52
53 #include "error.h"
54 #include "execfreq.h"
55 #include "ircons.h"
56 #include "irdom.h"
57 #include "iredges_t.h"
58 #include "irgraph_t.h"
59 #include "irgwalk.h"
60 #include "irnode_t.h"
61 #include "obst.h"
62 #include "raw_bitset.h"
63 #include "unionfind.h"
64
65 #include "beabi.h"
66 #include "bechordal_t.h"
67 #include "be.h"
68 #include "beirg.h"
69 #include "belive_t.h"
70 #include "bemodule.h"
71 #include "benode_t.h"
72 #include "bera.h"
73 #include "besched.h"
74 #include "bespill.h"
75 #include "bespillutil.h"
76 #include "beverify.h"
77
78 #include "hungarian.h"
79
80 #define USE_FACTOR         1.0f
81 #define DEF_FACTOR         1.0f
82 #define NEIGHBOR_FACTOR    0.2f
83 #define AFF_SHOULD_BE_SAME 0.5f
84 #define AFF_PHI            1.0f
85 #define SPLIT_DELTA        1.0f
86
87 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
88
89 static struct obstack               obst;
90 static be_irg_t                    *birg;
91 static ir_graph                    *irg;
92 static const arch_register_class_t *cls;
93 static const arch_register_req_t   *default_cls_req;
94 static be_lv_t                     *lv;
95 static const ir_exec_freq          *execfreqs;
96 static unsigned                     n_regs;
97 static unsigned                    *normal_regs;
98 static int                         *congruence_classes;
99
100 /** currently active assignments (while processing a basic block)
101  * maps registers to values(their current copies) */
102 static ir_node **assignments;
103
104 /**
105  * allocation information: last_uses, register preferences
106  * the information is per firm-node.
107  */
108 struct allocation_info_t {
109         unsigned  last_uses;      /**< bitset indicating last uses (input pos) */
110         ir_node  *current_value;  /**< copy of the value that should be used */
111         ir_node  *original_value; /**< for copies point to original value */
112         float     prefs[0];       /**< register preferences */
113 };
114 typedef struct allocation_info_t allocation_info_t;
115
116 /** helper datastructure used when sorting register preferences */
117 struct reg_pref_t {
118         unsigned num;
119         float    pref;
120 };
121 typedef struct reg_pref_t reg_pref_t;
122
123 /** per basic-block information */
124 struct block_info_t {
125         bool     processed;       /**< indicate wether block is processed */
126         ir_node *assignments[0];  /**< register assignments at end of block */
127 };
128 typedef struct block_info_t block_info_t;
129
130 /**
131  * Get the allocation info for a node.
132  * The info is allocated on the first visit of a node.
133  */
134 static allocation_info_t *get_allocation_info(ir_node *node)
135 {
136         allocation_info_t *info = get_irn_link(node);
137         if (info == NULL) {
138                 info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
139                 info->current_value  = node;
140                 info->original_value = node;
141                 set_irn_link(node, info);
142         }
143
144         return info;
145 }
146
147 /**
148  * Get allocation information for a basic block
149  */
150 static block_info_t *get_block_info(ir_node *block)
151 {
152         block_info_t *info = get_irn_link(block);
153
154         assert(is_Block(block));
155         if (info == NULL) {
156                 info = OALLOCFZ(&obst, block_info_t, assignments, n_regs);
157                 set_irn_link(block, info);
158         }
159
160         return info;
161 }
162
163 /**
164  * Get default register requirement for the current register class
165  */
166 static const arch_register_req_t *get_default_req_current_cls(void)
167 {
168         if (default_cls_req == NULL) {
169                 struct obstack      *obst = get_irg_obstack(irg);
170                 arch_register_req_t *req  = OALLOCZ(obst, arch_register_req_t);
171
172                 req->type = arch_register_req_type_normal;
173                 req->cls  = cls;
174
175                 default_cls_req = req;
176         }
177         return default_cls_req;
178 }
179
180 /**
181  * Link the allocation info of a node to a copy.
182  * Afterwards, both nodes uses the same allocation info.
183  * Copy must not have an allocation info assigned yet.
184  *
185  * @param copy   the node that gets the allocation info assigned
186  * @param value  the original node
187  */
188 static void mark_as_copy_of(ir_node *copy, ir_node *value)
189 {
190         ir_node           *original;
191         allocation_info_t *info      = get_allocation_info(value);
192         allocation_info_t *copy_info = get_allocation_info(copy);
193
194         /* find original value */
195         original = info->original_value;
196         if (original != value) {
197                 info = get_allocation_info(original);
198         }
199
200         assert(info->original_value == original);
201         info->current_value = copy;
202
203         /* the copy should not be linked to something else yet */
204         assert(copy_info->original_value == copy);
205         /* copy over allocation preferences */
206         memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
207         copy_info->original_value = original;
208 }
209
210 /**
211  * Calculate the penalties for every register on a node and its live neighbors.
212  *
213  * @param live_nodes  the set of live nodes at the current position, may be NULL
214  * @param penalty     the penalty to subtract from
215  * @param limited     a raw bitset containing the limited set for the node
216  * @param node        the node
217  */
218 static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
219                                       float penalty, const unsigned* limited,
220                                       ir_node *node)
221 {
222         ir_nodeset_iterator_t iter;
223         unsigned              r;
224         allocation_info_t     *info = get_allocation_info(node);
225         ir_node               *neighbor;
226
227         /* give penalty for all forbidden regs */
228         for (r = 0; r < n_regs; ++r) {
229                 if (rbitset_is_set(limited, r))
230                         continue;
231
232                 info->prefs[r] -= penalty;
233         }
234
235         /* all other live values should get a penalty for allowed regs */
236         if (live_nodes == NULL)
237                 return;
238
239         /* TODO: reduce penalty if there are multiple allowed registers... */
240         penalty *= NEIGHBOR_FACTOR;
241         foreach_ir_nodeset(live_nodes, neighbor, iter) {
242                 allocation_info_t *neighbor_info;
243
244                 /* TODO: if op is used on multiple inputs we might not do a
245                  * continue here */
246                 if (neighbor == node)
247                         continue;
248
249                 neighbor_info = get_allocation_info(neighbor);
250                 for (r = 0; r < n_regs; ++r) {
251                         if (!rbitset_is_set(limited, r))
252                                 continue;
253
254                         neighbor_info->prefs[r] -= penalty;
255                 }
256         }
257 }
258
259 /**
260  * Calculate the preferences of a definition for the current register class.
261  * If the definition uses a limited set of registers, reduce the preferences
262  * for the limited register on the node and its neighbors.
263  *
264  * @param live_nodes  the set of live nodes at the current node
265  * @param weight      the weight
266  * @param node        the current node
267  */
268 static void check_defs(const ir_nodeset_t *live_nodes, float weight,
269                        ir_node *node)
270 {
271         const arch_register_req_t *req;
272
273         if (get_irn_mode(node) == mode_T) {
274                 const ir_edge_t *edge;
275                 foreach_out_edge(node, edge) {
276                         ir_node *proj = get_edge_src_irn(edge);
277                         check_defs(live_nodes, weight, proj);
278                 }
279                 return;
280         }
281
282         if (!arch_irn_consider_in_reg_alloc(cls, node))
283                 return;
284
285         req = arch_get_register_req_out(node);
286         if (req->type & arch_register_req_type_limited) {
287                 const unsigned *limited = req->limited;
288                 float           penalty = weight * DEF_FACTOR;
289                 give_penalties_for_limits(live_nodes, penalty, limited, node);
290         }
291
292         if (req->type & arch_register_req_type_should_be_same) {
293                 ir_node           *insn  = skip_Proj(node);
294                 allocation_info_t *info  = get_allocation_info(node);
295                 int                arity = get_irn_arity(insn);
296                 int                i;
297
298                 float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
299                 for (i = 0; i < arity; ++i) {
300                         ir_node           *op;
301                         unsigned           r;
302                         allocation_info_t *op_info;
303
304                         if (!rbitset_is_set(&req->other_same, i))
305                                 continue;
306
307                         op = get_irn_n(insn, i);
308
309                         /* if we the value at the should_be_same input doesn't die at the
310                          * node, then it is no use to propagate the constraints (since a
311                          * copy will emerge anyway) */
312                         if (ir_nodeset_contains(live_nodes, op))
313                                 continue;
314
315                         op_info = get_allocation_info(op);
316                         for (r = 0; r < n_regs; ++r) {
317                                 op_info->prefs[r] += info->prefs[r] * factor;
318                         }
319                 }
320         }
321 }
322
323 /**
324  * Walker: Runs an a block calculates the preferences for any
325  * node and every register from the considered register class.
326  */
327 static void analyze_block(ir_node *block, void *data)
328 {
329         float         weight = get_block_execfreq(execfreqs, block);
330         ir_nodeset_t  live_nodes;
331         ir_node      *node;
332         (void) data;
333
334         ir_nodeset_init(&live_nodes);
335         be_liveness_end_of_block(lv, cls, block, &live_nodes);
336
337         sched_foreach_reverse(block, node) {
338                 allocation_info_t *info;
339                 int                i;
340                 int                arity;
341
342                 if (is_Phi(node))
343                         break;
344
345                 check_defs(&live_nodes, weight, node);
346
347                 /* mark last uses */
348                 arity = get_irn_arity(node);
349
350                 /* the allocation info node currently only uses 1 unsigned value
351                    to mark last used inputs. So we will fail for a node with more than
352                    32 inputs. */
353                 if (arity >= (int) sizeof(unsigned) * 8) {
354                         panic("Node with more than %d inputs not supported yet",
355                                         (int) sizeof(unsigned) * 8);
356                 }
357
358                 info = get_allocation_info(node);
359                 for (i = 0; i < arity; ++i) {
360                         ir_node *op = get_irn_n(node, i);
361                         if (!arch_irn_consider_in_reg_alloc(cls, op))
362                                 continue;
363
364                         /* last usage of a value? */
365                         if (!ir_nodeset_contains(&live_nodes, op)) {
366                                 rbitset_set(&info->last_uses, i);
367                         }
368                 }
369
370                 be_liveness_transfer(cls, node, &live_nodes);
371
372                 /* update weights based on usage constraints */
373                 for (i = 0; i < arity; ++i) {
374                         const arch_register_req_t *req;
375                         const unsigned            *limited;
376                         ir_node                   *op = get_irn_n(node, i);
377
378                         if (!arch_irn_consider_in_reg_alloc(cls, op))
379                                 continue;
380
381                         req = arch_get_register_req(node, i);
382                         if (!(req->type & arch_register_req_type_limited))
383                                 continue;
384
385                         limited = req->limited;
386                         give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
387                                                   op);
388                 }
389         }
390
391         ir_nodeset_destroy(&live_nodes);
392 }
393
394 static void create_congurence_class(ir_node *node, void *data)
395 {
396         (void) data;
397         if (is_Phi(node)) {
398                 int      i;
399                 int      arity   = get_irn_arity(node);
400                 unsigned phi_idx = get_irn_idx(node);
401                 phi_idx     = uf_find(congruence_classes, phi_idx);
402                 for (i = 0; i < arity; ++i) {
403                         ir_node *op     = get_Phi_pred(node, i);
404                         int      op_idx = get_irn_idx(op);
405                         op_idx  = uf_find(congruence_classes, op_idx);
406                         phi_idx = uf_union(congruence_classes, phi_idx, op_idx);
407                 }
408                 return;
409         }
410         /* should be same constraint? */
411         if (is_Proj(node)) {
412                 const arch_register_req_t *req = arch_get_register_req_out(node);
413                 if (req->type & arch_register_req_type_should_be_same) {
414                         ir_node *pred  = get_Proj_pred(node);
415                         int      arity = get_irn_arity(pred);
416                         int      i;
417                         unsigned node_idx = get_irn_idx(node);
418                         node_idx          = uf_find(congruence_classes, node_idx);
419
420                         for (i = 0; i < arity; ++i) {
421                                 ir_node *op;
422                                 unsigned op_idx;
423
424                                 if (!rbitset_is_set(&req->other_same, i))
425                                         continue;
426
427                                 op     = get_irn_n(pred, i);
428                                 op_idx = get_irn_idx(op);
429                                 op_idx = uf_find(congruence_classes, op_idx);
430                                 node_idx = uf_union(congruence_classes, node_idx, op_idx);
431                         }
432                 }
433                 return;
434         }
435 }
436
437 static void merge_congruence_prefs(ir_node *node, void *data)
438 {
439         allocation_info_t *info;
440         allocation_info_t *head_info;
441         unsigned node_idx = get_irn_idx(node);
442         unsigned node_set = uf_find(congruence_classes, node_idx);
443         unsigned r;
444
445         (void) data;
446
447         /* head of congruence class or not in any class */
448         if (node_set == node_idx)
449                 return;
450
451         if (!arch_irn_consider_in_reg_alloc(cls, node))
452                 return;
453
454         head_info = get_allocation_info(get_idx_irn(irg, node_set));
455         info      = get_allocation_info(node);
456
457         for (r = 0; r < n_regs; ++r) {
458                 head_info->prefs[r] += info->prefs[r];
459         }
460 }
461
462 static void set_congruence_prefs(ir_node *node, void *data)
463 {
464         allocation_info_t *info;
465         allocation_info_t *head_info;
466         unsigned node_idx = get_irn_idx(node);
467         unsigned node_set = uf_find(congruence_classes, node_idx);
468
469         (void) data;
470
471         /* head of congruence class or not in any class */
472         if (node_set == node_idx)
473                 return;
474
475         if (!arch_irn_consider_in_reg_alloc(cls, node))
476                 return;
477
478         head_info = get_allocation_info(get_idx_irn(irg, node_set));
479         info      = get_allocation_info(node);
480
481         memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0]));
482 }
483
484 static void combine_congruence_classes(void)
485 {
486         size_t n = get_irg_last_idx(irg);
487         congruence_classes = XMALLOCN(int, n);
488         uf_init(congruence_classes, n);
489
490         /* create congruence classes */
491         irg_walk_graph(irg, create_congurence_class, NULL, NULL);
492         /* merge preferences */
493         irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL);
494         irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
495 }
496
497
498
499
500
501 /**
502  * Assign register reg to the given node.
503  *
504  * @param node  the node
505  * @param reg   the register
506  */
507 static void use_reg(ir_node *node, const arch_register_t *reg)
508 {
509         unsigned r = arch_register_get_index(reg);
510         assignments[r] = node;
511         arch_set_irn_register(node, reg);
512 }
513
514 static void free_reg_of_value(ir_node *node)
515 {
516         const arch_register_t *reg;
517         unsigned               r;
518
519         if (!arch_irn_consider_in_reg_alloc(cls, node))
520                 return;
521
522         reg        = arch_get_irn_register(node);
523         r          = arch_register_get_index(reg);
524         /* assignment->value may be NULL if a value is used at 2 inputs
525            so it gets freed twice. */
526         assert(assignments[r] == node || assignments[r] == NULL);
527         assignments[r] = NULL;
528 }
529
530 /**
531  * Compare two register preferences in decreasing order.
532  */
533 static int compare_reg_pref(const void *e1, const void *e2)
534 {
535         const reg_pref_t *rp1 = (const reg_pref_t*) e1;
536         const reg_pref_t *rp2 = (const reg_pref_t*) e2;
537         if (rp1->pref < rp2->pref)
538                 return 1;
539         if (rp1->pref > rp2->pref)
540                 return -1;
541         return 0;
542 }
543
544 static void fill_sort_candidates(reg_pref_t *regprefs,
545                                  const allocation_info_t *info)
546 {
547         unsigned r;
548
549         for (r = 0; r < n_regs; ++r) {
550                 float pref = info->prefs[r];
551                 regprefs[r].num  = r;
552                 regprefs[r].pref = pref;
553         }
554         /* TODO: use a stable sort here to avoid unnecessary register jumping */
555         qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
556 }
557
558 static bool try_optimistic_split(ir_node *to_split, ir_node *before,
559                                  float pref, float pref_delta,
560                                  unsigned *output_regs)
561 {
562         const arch_register_t *reg;
563         ir_node               *block;
564         ir_node               *copy;
565         unsigned               r;
566         unsigned               i;
567         allocation_info_t     *info = get_allocation_info(to_split);
568         reg_pref_t            *prefs;
569         float                  delta;
570
571         (void) pref;
572
573         /* find the best free position where we could move to */
574         prefs = ALLOCAN(reg_pref_t, n_regs);
575         fill_sort_candidates(prefs, info);
576         for (i = 0; i < n_regs; ++i) {
577                 r = prefs[i].num;
578                 if (!rbitset_is_set(normal_regs, r))
579                         continue;
580                 if (rbitset_is_set(output_regs, r))
581                         continue;
582                 if (assignments[r] == NULL)
583                         break;
584         }
585         if (i >= n_regs) {
586                 return false;
587         }
588         /* TODO: use execfreq somehow... */
589         delta = pref_delta + prefs[i].pref;
590         if (delta < SPLIT_DELTA) {
591                 DB((dbg, LEVEL_3, "Not doing optimistical split, win %f too low\n",
592                     delta));
593                 return false;
594         }
595
596         reg   = arch_register_for_index(cls, r);
597         block = get_nodes_block(before);
598         copy = be_new_Copy(cls, block, to_split);
599         mark_as_copy_of(copy, to_split);
600         free_reg_of_value(to_split);
601         use_reg(copy, reg);
602         sched_add_before(before, copy);
603
604         DB((dbg, LEVEL_3,
605             "Optimistic live-range split %+F move %+F -> %s before %+F (win %f)\n",
606             copy, to_split, reg->name, before, delta));
607         return true;
608 }
609
610 /**
611  * Determine and assign a register for node @p node
612  */
613 static void assign_reg(const ir_node *block, ir_node *node,
614                        unsigned *output_regs)
615 {
616         const arch_register_t     *reg;
617         allocation_info_t         *info;
618         const arch_register_req_t *req;
619         reg_pref_t                *reg_prefs;
620         ir_node                   *in_node;
621         unsigned                   i;
622         const unsigned            *allowed_regs;
623         unsigned                   r;
624
625         assert(arch_irn_consider_in_reg_alloc(cls, node));
626
627         /* preassigned register? */
628         reg = arch_get_irn_register(node);
629         if (reg != NULL) {
630                 DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
631                 use_reg(node, reg);
632                 return;
633         }
634
635         /* give should_be_same boni */
636         info = get_allocation_info(node);
637         req  = arch_get_register_req_out(node);
638
639         in_node = skip_Proj(node);
640         if (req->type & arch_register_req_type_should_be_same) {
641                 float weight = get_block_execfreq(execfreqs, block);
642                 int   arity  = get_irn_arity(in_node);
643                 int   i;
644
645                 assert(arity <= (int) sizeof(req->other_same) * 8);
646                 for (i = 0; i < arity; ++i) {
647                         ir_node               *in;
648                         const arch_register_t *reg;
649                         unsigned               r;
650                         if (!rbitset_is_set(&req->other_same, i))
651                                 continue;
652
653                         in  = get_irn_n(in_node, i);
654                         reg = arch_get_irn_register(in);
655                         assert(reg != NULL);
656                         r = arch_register_get_index(reg);
657
658                         /* if the value didn't die here then we should not propagate the
659                          * should_be_same info */
660                         if (assignments[r] == in)
661                                 continue;
662
663                         info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
664                 }
665         }
666
667         /* create list of register candidates and sort by their preference */
668         DB((dbg, LEVEL_2, "Candidates for %+F:", node));
669         reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
670         fill_sort_candidates(reg_prefs, info);
671         for (i = 0; i < n_regs; ++i) {
672                 unsigned num = reg_prefs[i].num;
673                 const arch_register_t *reg;
674
675                 if (!rbitset_is_set(normal_regs, num))
676                         continue;
677
678                 reg = arch_register_for_index(cls, num);
679                 DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
680         }
681         DB((dbg, LEVEL_2, "\n"));
682
683         allowed_regs = normal_regs;
684         if (req->type & arch_register_req_type_limited) {
685                 allowed_regs = req->limited;
686         }
687
688         for (i = 0; i < n_regs; ++i) {
689                 r = reg_prefs[i].num;
690                 if (!rbitset_is_set(allowed_regs, r))
691                         continue;
692                 if (assignments[r] == NULL)
693                         break;
694                 if (!is_Phi(node)) {
695                         float    pref   = reg_prefs[i].pref;
696                         float    delta  = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
697                         ir_node *before = skip_Proj(node);
698                         bool     res    = try_optimistic_split(assignments[r], before,
699                                                                pref, delta,
700                                                                output_regs);
701                         if (res)
702                                 break;
703                 }
704         }
705         if (i >= n_regs) {
706                 panic("No register left for %+F\n", node);
707         }
708
709         reg = arch_register_for_index(cls, r);
710         DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
711         use_reg(node, reg);
712 }
713
714 /**
715  * Add an permutation in front of a node and change the assignments
716  * due to this permutation.
717  *
718  * To understand this imagine a permutation like this:
719  *
720  * 1 -> 2
721  * 2 -> 3
722  * 3 -> 1, 5
723  * 4 -> 6
724  * 5
725  * 6
726  * 7 -> 7
727  *
728  * First we count how many destinations a single value has. At the same time
729  * we can be sure that each destination register has at most 1 source register
730  * (it can have 0 which means we don't care what value is in it).
731  * We ignore all fullfilled permuations (like 7->7)
732  * In a first pass we create as much copy instructions as possible as they
733  * are generally cheaper than exchanges. We do this by counting into how many
734  * destinations a register has to be copied (in the example it's 2 for register
735  * 3, or 1 for the registers 1,2,4 and 7).
736  * We can then create a copy into every destination register when the usecount
737  * of that register is 0 (= noone else needs the value in the register).
738  *
739  * After this step we should have cycles left. We implement a cyclic permutation
740  * of n registers with n-1 transpositions.
741  *
742  * @param live_nodes   the set of live nodes, updated due to live range split
743  * @param before       the node before we add the permutation
744  * @param permutation  the permutation array indices are the destination
745  *                     registers, the values in the array are the source
746  *                     registers.
747  */
748 static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
749                              unsigned *permutation)
750 {
751         unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
752         ir_node   *block;
753         unsigned   r;
754
755         /* determine how often each source register needs to be read */
756         for (r = 0; r < n_regs; ++r) {
757                 unsigned  old_reg = permutation[r];
758                 ir_node  *value;
759
760                 value = assignments[old_reg];
761                 if (value == NULL) {
762                         /* nothing to do here, reg is not live. Mark it as fixpoint
763                          * so we ignore it in the next steps */
764                         permutation[r] = r;
765                         continue;
766                 }
767
768                 ++n_used[old_reg];
769         }
770
771         block = get_nodes_block(before);
772
773         /* step1: create copies where immediately possible */
774         for (r = 0; r < n_regs; /* empty */) {
775                 ir_node *copy;
776                 ir_node *src;
777                 const arch_register_t *reg;
778                 unsigned               old_r = permutation[r];
779
780                 /* - no need to do anything for fixed points.
781                    - we can't copy if the value in the dest reg is still needed */
782                 if (old_r == r || n_used[r] > 0) {
783                         ++r;
784                         continue;
785                 }
786
787                 /* create a copy */
788                 src  = assignments[old_r];
789                 copy = be_new_Copy(cls, block, src);
790                 sched_add_before(before, copy);
791                 reg = arch_register_for_index(cls, r);
792                 DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
793                     copy, src, before, reg->name));
794                 mark_as_copy_of(copy, src);
795                 use_reg(copy, reg);
796
797                 if (live_nodes != NULL) {
798                         ir_nodeset_insert(live_nodes, copy);
799                 }
800
801                 /* old register has 1 user less, permutation is resolved */
802                 assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
803                 permutation[r] = r;
804
805                 assert(n_used[old_r] > 0);
806                 --n_used[old_r];
807                 if (n_used[old_r] == 0) {
808                         if (live_nodes != NULL) {
809                                 ir_nodeset_remove(live_nodes, src);
810                         }
811                         free_reg_of_value(src);
812                 }
813
814                 /* advance or jump back (if this copy enabled another copy) */
815                 if (old_r < r && n_used[old_r] == 0) {
816                         r = old_r;
817                 } else {
818                         ++r;
819                 }
820         }
821
822         /* at this point we only have "cycles" left which we have to resolve with
823          * perm instructions
824          * TODO: if we have free registers left, then we should really use copy
825          * instructions for any cycle longer than 2 registers...
826          * (this is probably architecture dependent, there might be archs where
827          *  copies are preferable even for 2-cycles) */
828
829         /* create perms with the rest */
830         for (r = 0; r < n_regs; /* empty */) {
831                 const arch_register_t *reg;
832                 unsigned  old_r = permutation[r];
833                 unsigned  r2;
834                 ir_node  *in[2];
835                 ir_node  *perm;
836                 ir_node  *proj0;
837                 ir_node  *proj1;
838
839                 if (old_r == r) {
840                         ++r;
841                         continue;
842                 }
843
844                 /* we shouldn't have copies from 1 value to multiple destinations left*/
845                 assert(n_used[old_r] == 1);
846
847                 /* exchange old_r and r2; after that old_r is a fixed point */
848                 r2 = permutation[old_r];
849
850                 in[0] = assignments[r2];
851                 in[1] = assignments[old_r];
852                 perm = be_new_Perm(cls, block, 2, in);
853                 sched_add_before(before, perm);
854                 DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
855                     perm, in[0], in[1], before));
856
857                 proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0);
858                 mark_as_copy_of(proj0, in[0]);
859                 reg = arch_register_for_index(cls, old_r);
860                 use_reg(proj0, reg);
861
862                 proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1);
863                 mark_as_copy_of(proj1, in[1]);
864                 reg = arch_register_for_index(cls, r2);
865                 use_reg(proj1, reg);
866
867                 /* 1 value is now in the correct register */
868                 permutation[old_r] = old_r;
869                 /* the source of r changed to r2 */
870                 permutation[r] = r2;
871
872                 /* if we have reached a fixpoint update data structures */
873                 if (live_nodes != NULL) {
874                         ir_nodeset_remove(live_nodes, in[0]);
875                         ir_nodeset_remove(live_nodes, in[1]);
876                         ir_nodeset_remove(live_nodes, proj0);
877                         ir_nodeset_insert(live_nodes, proj1);
878                 }
879         }
880
881 #ifdef DEBUG_libfirm
882         /* now we should only have fixpoints left */
883         for (r = 0; r < n_regs; ++r) {
884                 assert(permutation[r] == r);
885         }
886 #endif
887 }
888
889 /**
890  * Free regs for values last used.
891  *
892  * @param live_nodes   set of live nodes, will be updated
893  * @param node         the node to consider
894  */
895 static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
896 {
897         allocation_info_t     *info      = get_allocation_info(node);
898         const unsigned        *last_uses = &info->last_uses;
899         int                    arity     = get_irn_arity(node);
900         int                    i;
901
902         for (i = 0; i < arity; ++i) {
903                 ir_node *op;
904
905                 /* check if one operand is the last use */
906                 if (!rbitset_is_set(last_uses, i))
907                         continue;
908
909                 op = get_irn_n(node, i);
910                 free_reg_of_value(op);
911                 ir_nodeset_remove(live_nodes, op);
912         }
913 }
914
915 /**
916  * change inputs of a node to the current value (copies/perms)
917  */
918 static void rewire_inputs(ir_node *node)
919 {
920         int i;
921         int arity = get_irn_arity(node);
922
923         for (i = 0; i < arity; ++i) {
924                 ir_node           *op = get_irn_n(node, i);
925                 allocation_info_t *info;
926
927                 if (!arch_irn_consider_in_reg_alloc(cls, op))
928                         continue;
929
930                 info = get_allocation_info(op);
931                 info = get_allocation_info(info->original_value);
932                 if (info->current_value != op) {
933                         set_irn_n(node, i, info->current_value);
934                 }
935         }
936 }
937
938 /**
939  * Create a bitset of registers occupied with value living through an
940  * instruction
941  */
942 static void determine_live_through_regs(unsigned *bitset, ir_node *node)
943 {
944         const allocation_info_t *info = get_allocation_info(node);
945         unsigned r;
946         int i;
947         int arity;
948
949         /* mark all used registers as potentially live-through */
950         for (r = 0; r < n_regs; ++r) {
951                 if (assignments[r] == NULL)
952                         continue;
953                 if (!rbitset_is_set(normal_regs, r))
954                         continue;
955
956                 rbitset_set(bitset, r);
957         }
958
959         /* remove registers of value dying at the instruction */
960         arity = get_irn_arity(node);
961         for (i = 0; i < arity; ++i) {
962                 ir_node               *op;
963                 const arch_register_t *reg;
964
965                 if (!rbitset_is_set(&info->last_uses, i))
966                         continue;
967
968                 op  = get_irn_n(node, i);
969                 reg = arch_get_irn_register(op);
970                 rbitset_clear(bitset, arch_register_get_index(reg));
971         }
972 }
973
974 /**
975  * Enforce constraints at a node by live range splits.
976  *
977  * @param  live_nodes  the set of live nodes, might be changed
978  * @param  node        the current node
979  */
980 static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
981                                 unsigned *output_regs)
982 {
983         int arity = get_irn_arity(node);
984         int i, dummy, res;
985         hungarian_problem_t *bp;
986         unsigned l, r;
987         unsigned *assignment;
988
989         /* construct a list of register occupied by live-through values */
990         unsigned *live_through_regs = NULL;
991
992         /* see if any use constraints are not met */
993         bool good = true;
994         for (i = 0; i < arity; ++i) {
995                 ir_node                   *op = get_irn_n(node, i);
996                 const arch_register_t     *reg;
997                 const arch_register_req_t *req;
998                 const unsigned            *limited;
999                 unsigned                  r;
1000
1001                 if (!arch_irn_consider_in_reg_alloc(cls, op))
1002                         continue;
1003
1004                 /* are there any limitations for the i'th operand? */
1005                 req = arch_get_register_req(node, i);
1006                 if (!(req->type & arch_register_req_type_limited))
1007                         continue;
1008
1009                 limited = req->limited;
1010                 reg     = arch_get_irn_register(op);
1011                 r       = arch_register_get_index(reg);
1012                 if (!rbitset_is_set(limited, r)) {
1013                         /* found an assignment outside the limited set */
1014                         good = false;
1015                         break;
1016                 }
1017         }
1018
1019         /* is any of the live-throughs using a constrained output register? */
1020         if (get_irn_mode(node) == mode_T) {
1021                 const ir_edge_t *edge;
1022
1023                 foreach_out_edge(node, edge) {
1024                         ir_node *proj = get_edge_src_irn(edge);
1025                         const arch_register_req_t *req;
1026
1027                         if (!arch_irn_consider_in_reg_alloc(cls, proj))
1028                                 continue;
1029
1030                         req = arch_get_register_req_out(proj);
1031                         if (!(req->type & arch_register_req_type_limited))
1032                                 continue;
1033
1034                         if (live_through_regs == NULL) {
1035                                 rbitset_alloca(live_through_regs, n_regs);
1036                                 determine_live_through_regs(live_through_regs, node);
1037                         }
1038
1039                         rbitset_or(output_regs, req->limited, n_regs);
1040                         if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
1041                                 good = false;
1042                         }
1043                 }
1044         } else {
1045                 if (arch_irn_consider_in_reg_alloc(cls, node)) {
1046                         const arch_register_req_t *req = arch_get_register_req_out(node);
1047                         if (req->type & arch_register_req_type_limited) {
1048                                 rbitset_alloca(live_through_regs, n_regs);
1049                                 determine_live_through_regs(live_through_regs, node);
1050                                 if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
1051                                         good = false;
1052                                         rbitset_or(output_regs, req->limited, n_regs);
1053                                 }
1054                         }
1055                 }
1056         }
1057
1058         if (good)
1059                 return;
1060
1061         /* create these arrays if we haven't yet */
1062         if (live_through_regs == NULL) {
1063                 rbitset_alloca(live_through_regs, n_regs);
1064         }
1065
1066         /* at this point we have to construct a bipartite matching problem to see
1067          * which values should go to which registers
1068          * Note: We're building the matrix in "reverse" - source registers are
1069          *       right, destinations left because this will produce the solution
1070          *       in the format required for permute_values.
1071          */
1072         bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT);
1073
1074         /* add all combinations, then remove not allowed ones */
1075         for (l = 0; l < n_regs; ++l) {
1076                 if (!rbitset_is_set(normal_regs, l)) {
1077                         hungarian_add(bp, l, l, 1);
1078                         continue;
1079                 }
1080
1081                 for (r = 0; r < n_regs; ++r) {
1082                         if (!rbitset_is_set(normal_regs, r))
1083                                 continue;
1084                         /* livethrough values may not use constrainted output registers */
1085                         if (rbitset_is_set(live_through_regs, l)
1086                                         && rbitset_is_set(output_regs, r))
1087                                 continue;
1088
1089                         hungarian_add(bp, r, l, l == r ? 9 : 8);
1090                 }
1091         }
1092
1093         for (i = 0; i < arity; ++i) {
1094                 ir_node                   *op = get_irn_n(node, i);
1095                 const arch_register_t     *reg;
1096                 const arch_register_req_t *req;
1097                 const unsigned            *limited;
1098                 unsigned                   current_reg;
1099
1100                 if (!arch_irn_consider_in_reg_alloc(cls, op))
1101                         continue;
1102
1103                 req = arch_get_register_req(node, i);
1104                 if (!(req->type & arch_register_req_type_limited))
1105                         continue;
1106
1107                 limited     = req->limited;
1108                 reg         = arch_get_irn_register(op);
1109                 current_reg = arch_register_get_index(reg);
1110                 for (r = 0; r < n_regs; ++r) {
1111                         if (rbitset_is_set(limited, r))
1112                                 continue;
1113                         hungarian_remv(bp, r, current_reg);
1114                 }
1115         }
1116
1117         //hungarian_print_costmatrix(bp, 1);
1118         hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);
1119
1120         assignment = ALLOCAN(unsigned, n_regs);
1121         res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
1122         assert(res == 0);
1123
1124 #if 0
1125         fprintf(stderr, "Swap result:");
1126         for (i = 0; i < (int) n_regs; ++i) {
1127                 fprintf(stderr, " %d", assignment[i]);
1128         }
1129         fprintf(stderr, "\n");
1130 #endif
1131
1132         hungarian_free(bp);
1133
1134         permute_values(live_nodes, node, assignment);
1135 }
1136
1137 /** test wether a node @p n is a copy of the value of node @p of */
1138 static bool is_copy_of(ir_node *value, ir_node *test_value)
1139 {
1140         allocation_info_t *test_info;
1141         allocation_info_t *info;
1142
1143         if (value == test_value)
1144                 return true;
1145
1146         info      = get_allocation_info(value);
1147         test_info = get_allocation_info(test_value);
1148         return test_info->original_value == info->original_value;
1149 }
1150
1151 /**
1152  * find a value in the end-assignment of a basic block
1153  * @returns the index into the assignment array if found
1154  *          -1 if not found
1155  */
1156 static int find_value_in_block_info(block_info_t *info, ir_node *value)
1157 {
1158         unsigned   r;
1159         ir_node  **assignments = info->assignments;
1160         for (r = 0; r < n_regs; ++r) {
1161                 ir_node *a_value = assignments[r];
1162
1163                 if (a_value == NULL)
1164                         continue;
1165                 if (is_copy_of(a_value, value))
1166                         return (int) r;
1167         }
1168
1169         return -1;
1170 }
1171
1172 /**
1173  * Create the necessary permutations at the end of a basic block to fullfill
1174  * the register assignment for phi-nodes in the next block
1175  */
1176 static void add_phi_permutations(ir_node *block, int p)
1177 {
1178         unsigned   r;
1179         unsigned  *permutation;
1180         ir_node  **old_assignments;
1181         bool       need_permutation;
1182         ir_node   *node;
1183         ir_node   *pred = get_Block_cfgpred_block(block, p);
1184
1185         block_info_t *pred_info = get_block_info(pred);
1186
1187         /* predecessor not processed yet? nothing to do */
1188         if (!pred_info->processed)
1189                 return;
1190
1191         permutation = ALLOCAN(unsigned, n_regs);
1192         for (r = 0; r < n_regs; ++r) {
1193                 permutation[r] = r;
1194         }
1195
1196         /* check phi nodes */
1197         need_permutation = false;
1198         node = sched_first(block);
1199         for ( ; is_Phi(node); node = sched_next(node)) {
1200                 const arch_register_t *reg;
1201                 int                    regn;
1202                 int                    a;
1203                 ir_node               *op;
1204
1205                 if (!arch_irn_consider_in_reg_alloc(cls, node))
1206                         continue;
1207
1208                 op = get_Phi_pred(node, p);
1209                 if (!arch_irn_consider_in_reg_alloc(cls, op))
1210                         continue;
1211
1212                 a  = find_value_in_block_info(pred_info, op);
1213                 assert(a >= 0);
1214
1215                 reg  = arch_get_irn_register(node);
1216                 regn = arch_register_get_index(reg);
1217                 if (regn != a) {
1218                         permutation[regn] = a;
1219                         need_permutation  = true;
1220                 }
1221         }
1222
1223         if (need_permutation) {
1224                 /* permute values at end of predecessor */
1225                 old_assignments = assignments;
1226                 assignments     = pred_info->assignments;
1227                 permute_values(NULL, be_get_end_of_block_insertion_point(pred),
1228                                                  permutation);
1229                 assignments = old_assignments;
1230         }
1231
1232         /* change phi nodes to use the copied values */
1233         node = sched_first(block);
1234         for ( ; is_Phi(node); node = sched_next(node)) {
1235                 int      a;
1236                 ir_node *op;
1237
1238                 if (!arch_irn_consider_in_reg_alloc(cls, node))
1239                         continue;
1240
1241                 op = get_Phi_pred(node, p);
1242                 /* no need to do anything for Unknown inputs */
1243                 if (!arch_irn_consider_in_reg_alloc(cls, op))
1244                         continue;
1245
1246                 /* we have permuted all values into the correct registers so we can
1247                    simply query which value occupies the phis register in the
1248                    predecessor */
1249                 a  = arch_register_get_index(arch_get_irn_register(node));
1250                 op = pred_info->assignments[a];
1251                 set_Phi_pred(node, p, op);
1252         }
1253 }
1254
1255 /**
1256  * Set preferences for a phis register based on the registers used on the
1257  * phi inputs.
1258  */
1259 static void adapt_phi_prefs(ir_node *phi)
1260 {
1261         int i;
1262         int arity = get_irn_arity(phi);
1263         ir_node           *block = get_nodes_block(phi);
1264         allocation_info_t *info  = get_allocation_info(phi);
1265
1266         for (i = 0; i < arity; ++i) {
1267                 ir_node               *op  = get_irn_n(phi, i);
1268                 const arch_register_t *reg = arch_get_irn_register(op);
1269                 ir_node               *pred_block;
1270                 block_info_t          *pred_block_info;
1271                 float                  weight;
1272                 unsigned               r;
1273
1274                 if (reg == NULL)
1275                         continue;
1276                 /* we only give the bonus if the predecessor already has registers
1277                  * assigned, otherwise we only see a dummy value
1278                  * and any conclusions about its register are useless */
1279                 pred_block = get_Block_cfgpred_block(block, i);
1280                 pred_block_info = get_block_info(pred_block);
1281                 if (!pred_block_info->processed)
1282                         continue;
1283
1284                 /* give bonus for already assigned register */
1285                 weight = get_block_execfreq(execfreqs, pred_block);
1286                 r      = arch_register_get_index(reg);
1287                 info->prefs[r] += weight * AFF_PHI;
1288         }
1289 }
1290
1291 /**
1292  * After a phi has been assigned a register propagate preference inputs
1293  * to the phi inputs.
1294  */
1295 static void propagate_phi_register(ir_node *phi, unsigned r)
1296 {
1297         int                    i;
1298         ir_node               *block = get_nodes_block(phi);
1299         int                    arity = get_irn_arity(phi);
1300
1301         for (i = 0; i < arity; ++i) {
1302                 ir_node           *op   = get_Phi_pred(phi, i);
1303                 allocation_info_t *info = get_allocation_info(op);
1304                 ir_node           *pred;
1305                 float              weight;
1306
1307                 pred   = get_Block_cfgpred_block(block, i);
1308                 weight = get_block_execfreq(execfreqs, pred);
1309                 weight *= AFF_PHI;
1310
1311                 if (info->prefs[r] >= weight)
1312                         continue;
1313
1314                 /* promote the prefered register */
1315                 info->prefs[r] += AFF_PHI * weight;
1316                 if (is_Phi(op))
1317                         propagate_phi_register(op, r);
1318         }
1319 }
1320
1321 /**
1322  * Walker: assign registers to all nodes of a block that
1323  * need registers from the currently considered register class.
1324  */
1325 static void allocate_coalesce_block(ir_node *block, void *data)
1326 {
1327         int                    i;
1328         ir_nodeset_t           live_nodes;
1329         ir_nodeset_iterator_t  iter;
1330         ir_node               *node, *start;
1331         int                    n_preds;
1332         block_info_t          *block_info;
1333         block_info_t         **pred_block_infos;
1334         ir_node              **phi_ins;
1335         unsigned              *output_regs; /**< collects registers which must not
1336                                                                                      be used for optimistic splits */
1337
1338         (void) data;
1339         DB((dbg, LEVEL_2, "* Block %+F\n", block));
1340
1341         /* clear assignments */
1342         block_info  = get_block_info(block);
1343         assignments = block_info->assignments;
1344
1345         ir_nodeset_init(&live_nodes);
1346
1347         /* gather regalloc infos of predecessor blocks */
1348         n_preds             = get_Block_n_cfgpreds(block);
1349         pred_block_infos    = ALLOCAN(block_info_t*, n_preds);
1350         for (i = 0; i < n_preds; ++i) {
1351                 ir_node      *pred      = get_Block_cfgpred_block(block, i);
1352                 block_info_t *pred_info = get_block_info(pred);
1353                 pred_block_infos[i]     = pred_info;
1354         }
1355
1356         phi_ins = ALLOCAN(ir_node*, n_preds);
1357
1358         /* collect live-in nodes and preassigned values */
1359         be_lv_foreach(lv, block, be_lv_state_in, i) {
1360                 const arch_register_t *reg;
1361                 int                    p;
1362                 bool                   need_phi = false;
1363
1364                 node = be_lv_get_irn(lv, block, i);
1365                 if (!arch_irn_consider_in_reg_alloc(cls, node))
1366                         continue;
1367
1368                 /* check all predecessors for this value, if it is not everywhere the
1369                    same or unknown then we have to construct a phi
1370                    (we collect the potential phi inputs here) */
1371                 for (p = 0; p < n_preds; ++p) {
1372                         block_info_t *pred_info = pred_block_infos[p];
1373
1374                         if (!pred_info->processed) {
1375                                 /* use node for now, it will get fixed later */
1376                                 phi_ins[p] = node;
1377                                 need_phi   = true;
1378                         } else {
1379                                 int a = find_value_in_block_info(pred_info, node);
1380
1381                                 /* must live out of predecessor */
1382                                 assert(a >= 0);
1383                                 phi_ins[p] = pred_info->assignments[a];
1384                                 /* different value from last time? then we need a phi */
1385                                 if (p > 0 && phi_ins[p-1] != phi_ins[p]) {
1386                                         need_phi = true;
1387                                 }
1388                         }
1389                 }
1390
1391                 if (need_phi) {
1392                         ir_mode                   *mode = get_irn_mode(node);
1393                         const arch_register_req_t *req  = get_default_req_current_cls();
1394                         ir_node                   *phi;
1395                         int                        i;
1396
1397                         phi = new_r_Phi(block, n_preds, phi_ins, mode);
1398                         be_set_phi_reg_req(phi, req);
1399
1400                         DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node));
1401 #ifdef DEBUG_libfirm
1402                         for (i = 0; i < n_preds; ++i) {
1403                                 DB((dbg, LEVEL_3, " %+F", phi_ins[i]));
1404                         }
1405                         DB((dbg, LEVEL_3, "\n"));
1406 #endif
1407                         mark_as_copy_of(phi, node);
1408                         sched_add_after(block, phi);
1409
1410                         node = phi;
1411                 } else {
1412                         allocation_info_t *info = get_allocation_info(node);
1413                         info->current_value = phi_ins[0];
1414
1415                         /* Grab 1 of the inputs we constructed (might not be the same as
1416                          * "node" as we could see the same copy of the value in all
1417                          * predecessors */
1418                         node = phi_ins[0];
1419                 }
1420
1421                 /* if the node already has a register assigned use it */
1422                 reg = arch_get_irn_register(node);
1423                 if (reg != NULL) {
1424                         /* TODO: consult pred-block infos here. The value could be copied
1425                            away in some/all predecessor blocks. We need to construct
1426                            phi-nodes in this case.
1427                            We even need to construct some Phi_0 like constructs in cases
1428                            where the predecessor allocation is not determined yet. */
1429                         use_reg(node, reg);
1430                 }
1431
1432                 /* remember that this node is live at the beginning of the block */
1433                 ir_nodeset_insert(&live_nodes, node);
1434         }
1435
1436         rbitset_alloca(output_regs, n_regs);
1437
1438         /* handle phis... */
1439         node = sched_first(block);
1440         for ( ; is_Phi(node); node = sched_next(node)) {
1441                 const arch_register_t *reg;
1442
1443                 if (!arch_irn_consider_in_reg_alloc(cls, node))
1444                         continue;
1445
1446                 /* fill in regs already assigned */
1447                 reg = arch_get_irn_register(node);
1448                 if (reg != NULL) {
1449                         use_reg(node, reg);
1450                 } else {
1451                         adapt_phi_prefs(node);
1452                         assign_reg(block, node, output_regs);
1453
1454                         reg = arch_get_irn_register(node);
1455                         propagate_phi_register(node, arch_register_get_index(reg));
1456                 }
1457         }
1458         start = node;
1459
1460         /* assign regs for live-in values */
1461         foreach_ir_nodeset(&live_nodes, node, iter) {
1462                 const arch_register_t *reg = arch_get_irn_register(node);
1463                 if (reg != NULL)
1464                         continue;
1465
1466                 assign_reg(block, node, output_regs);
1467                 /* shouldn't happen if we color in dominance order */
1468                 assert (!is_Phi(node));
1469         }
1470
1471         /* assign instructions in the block */
1472         for (node = start; !sched_is_end(node); node = sched_next(node)) {
1473                 unsigned r;
1474
1475                 rewire_inputs(node);
1476
1477                 /* enforce use constraints */
1478                 rbitset_clear_all(output_regs, n_regs);
1479                 enforce_constraints(&live_nodes, node, output_regs);
1480                 /* we may not use registers occupied here for optimistic splits */
1481                 for (r = 0; r < n_regs; ++r) {
1482                         if (assignments[r] != NULL)
1483                                 rbitset_set(output_regs, r);
1484                 }
1485
1486                 rewire_inputs(node);
1487
1488                 /* free registers of values last used at this instruction */
1489                 free_last_uses(&live_nodes, node);
1490
1491                 /* assign output registers */
1492                 /* TODO: 2 phases: first: pre-assigned ones, 2nd real regs */
1493                 if (get_irn_mode(node) == mode_T) {
1494                         const ir_edge_t *edge;
1495                         foreach_out_edge(node, edge) {
1496                                 ir_node *proj = get_edge_src_irn(edge);
1497                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
1498                                         continue;
1499                                 assign_reg(block, proj, output_regs);
1500                         }
1501                 } else if (arch_irn_consider_in_reg_alloc(cls, node)) {
1502                         assign_reg(block, node, output_regs);
1503                 }
1504         }
1505
1506         ir_nodeset_destroy(&live_nodes);
1507         assignments = NULL;
1508
1509         block_info->processed = true;
1510
1511         /* permute values at end of predecessor blocks in case of phi-nodes */
1512         if (n_preds > 1) {
1513                 int p;
1514                 for (p = 0; p < n_preds; ++p) {
1515                         add_phi_permutations(block, p);
1516                 }
1517         }
1518
1519         /* if we have exactly 1 successor then we might be able to produce phi
1520            copies now */
1521         if (get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) == 1) {
1522                 const ir_edge_t *edge
1523                         = get_irn_out_edge_first_kind(block, EDGE_KIND_BLOCK);
1524                 ir_node      *succ      = get_edge_src_irn(edge);
1525                 int           p         = get_edge_src_pos(edge);
1526                 block_info_t *succ_info = get_block_info(succ);
1527
1528                 if (succ_info->processed) {
1529                         add_phi_permutations(succ, p);
1530                 }
1531         }
1532 }
1533
1534 /**
1535  * Run the register allocator for the current register class.
1536  */
1537 static void be_straight_alloc_cls(void)
1538 {
1539         lv = be_assure_liveness(birg);
1540         be_liveness_assure_sets(lv);
1541         be_liveness_assure_chk(lv);
1542
1543         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1544
1545         DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name));
1546
1547         be_clear_links(irg);
1548         irg_block_walk_graph(irg, NULL, analyze_block, NULL);
1549         combine_congruence_classes();
1550         /* we need some dominance pre-order walk to ensure we see all
1551          *  definitions/create copies before we encounter their users */
1552         dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL);
1553
1554         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1555 }
1556
1557 static void dump(int mask, ir_graph *irg, const char *suffix,
1558                  void (*dumper)(ir_graph *, const char *))
1559 {
1560         if(birg->main_env->options->dump_flags & mask)
1561                 be_dump(irg, suffix, dumper);
1562 }
1563
1564 /**
1565  * Run the spiller on the current graph.
1566  */
1567 static void spill(void)
1568 {
1569         /* make sure all nodes show their real register pressure */
1570         BE_TIMER_PUSH(t_ra_constr);
1571         be_pre_spill_prepare_constr(birg, cls);
1572         BE_TIMER_POP(t_ra_constr);
1573
1574         dump(DUMP_RA, irg, "-spillprepare", dump_ir_block_graph_sched);
1575
1576         /* spill */
1577         BE_TIMER_PUSH(t_ra_spill);
1578         be_do_spill(birg, cls);
1579         BE_TIMER_POP(t_ra_spill);
1580
1581         BE_TIMER_PUSH(t_ra_spill_apply);
1582         check_for_memory_operands(irg);
1583         BE_TIMER_POP(t_ra_spill_apply);
1584
1585         dump(DUMP_RA, irg, "-spill", dump_ir_block_graph_sched);
1586 }
1587
1588 /**
1589  * The straight register allocator for a whole procedure.
1590  */
1591 static void be_straight_alloc(be_irg_t *new_birg)
1592 {
1593         const arch_env_t *arch_env = new_birg->main_env->arch_env;
1594         int   n_cls                = arch_env_get_n_reg_class(arch_env);
1595         int   c;
1596
1597         obstack_init(&obst);
1598
1599         birg      = new_birg;
1600         irg       = be_get_birg_irg(birg);
1601         execfreqs = birg->exec_freq;
1602
1603         /* TODO: extract some of the stuff from bechordal allocator, like
1604          * statistics, time measurements, etc. and use them here too */
1605
1606         for (c = 0; c < n_cls; ++c) {
1607                 cls             = arch_env_get_reg_class(arch_env, c);
1608                 default_cls_req = NULL;
1609                 if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra)
1610                         continue;
1611
1612                 stat_ev_ctx_push_str("regcls", cls->name);
1613
1614                 n_regs      = arch_register_class_n_regs(cls);
1615                 normal_regs = rbitset_malloc(n_regs);
1616                 be_abi_set_non_ignore_regs(birg->abi, cls, normal_regs);
1617
1618                 spill();
1619
1620                 /* verify schedule and register pressure */
1621                 BE_TIMER_PUSH(t_verify);
1622                 if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) {
1623                         be_verify_schedule(birg);
1624                         be_verify_register_pressure(birg, cls, irg);
1625                 } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) {
1626                         assert(be_verify_schedule(birg) && "Schedule verification failed");
1627                         assert(be_verify_register_pressure(birg, cls, irg)
1628                                 && "Register pressure verification failed");
1629                 }
1630                 BE_TIMER_POP(t_verify);
1631
1632                 BE_TIMER_PUSH(t_ra_color);
1633                 be_straight_alloc_cls();
1634                 BE_TIMER_POP(t_ra_color);
1635
1636                 /* we most probably constructed new Phis so liveness info is invalid
1637                  * now */
1638                 /* TODO: test liveness_introduce */
1639                 be_liveness_invalidate(lv);
1640                 free(normal_regs);
1641
1642                 stat_ev_ctx_pop("regcls");
1643         }
1644
1645         BE_TIMER_PUSH(t_ra_spill_apply);
1646         be_abi_fix_stack_nodes(birg->abi);
1647         BE_TIMER_POP(t_ra_spill_apply);
1648
1649         BE_TIMER_PUSH(t_verify);
1650         if (birg->main_env->options->vrfy_option == BE_VRFY_WARN) {
1651                 be_verify_register_allocation(birg);
1652         } else if (birg->main_env->options->vrfy_option == BE_VRFY_ASSERT) {
1653                 assert(be_verify_register_allocation(birg)
1654                                 && "Register allocation invalid");
1655         }
1656         BE_TIMER_POP(t_verify);
1657
1658         obstack_free(&obst, NULL);
1659 }
1660
1661 /**
1662  * Initializes this module.
1663  */
1664 void be_init_straight_alloc(void)
1665 {
1666         static be_ra_t be_ra_straight = {
1667                 be_straight_alloc,
1668         };
1669
1670         FIRM_DBG_REGISTER(dbg, "firm.be.straightalloc");
1671
1672         be_register_allocator("straight", &be_ra_straight);
1673 }
1674
1675 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_straight_alloc);