nsz Git - libfirm/blob - ir/be/bespillbelady2.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief       Beladys spillalgorithm version 2.
  23  * @author      Sebastian Hack, Matthias Braun, Daniel Grund
  24  * @date        01.08.2007
  25  * @version     $Id$
  26  *
  27  * The main differences to the original Belady are:
  28  * - The workset is empty at the start of a block
  29  *   There is no attempt to fill it with variables which
  30  *   are not used in the block.
  31  * - There is a global pass which tries to use the remaining
  32  *   capacity of the blocks to let global variables live through
  33  *   them.
  34  */
  35 #include "config.h"
  36
  37 #include <math.h>
  38 #include <limits.h>
  39
  40 #include "obst.h"
  41 #include "irnodeset.h"
  42 #include "irbitset.h"
  43 #include "irprintf_t.h"
  44 #include "irgraph.h"
  45 #include "irnode.h"
  46 #include "irmode.h"
  47 #include "irgwalk.h"
  48 #include "irloop.h"
  49 #include "iredges_t.h"
  50 #include "irphase_t.h"
  51 #include "ircons_t.h"
  52 #include "irprintf.h"
  53 #include "execfreq.h"
  54 #include "dfs_t.h"
  55
  56 #include "beutil.h"
  57 #include "bearch.h"
  58 #include "besched.h"
  59 #include "beirgmod.h"
  60 #include "belive_t.h"
  61 #include "benode.h"
  62 #include "bechordal_t.h"
  63 #include "bespill.h"
  64 #include "beloopana.h"
  65 #include "beirg.h"
  66 #include "bemodule.h"
  67 #include "bespillutil.h"
  68
  69 #include "lc_opts.h"
  70 #include "lc_opts_enum.h"
  71
  72 #define DBG_SPILL     1
  73 #define DBG_WSETS     2
  74 #define DBG_FIX       4
  75 #define DBG_DECIDE    8
  76 #define DBG_START    16
  77 #define DBG_SLOTS    32
  78 #define DBG_TRACE    64
  79 #define DBG_WORKSET 128
  80 #define DBG_GLOBAL  256
  81
  82 #define ALREADY_SPILLED_FACTOR 2
  83
  84 #define DEAD       UINT_MAX
  85 #define LIVE_END   (DEAD-1)
  86 #define REMAT_DIST (DEAD-2)
  87
  88 static int already_spilled_factor = 2;
  89 static int remat_live_range_ext   = 1;
  90 static int global_pass_enabled    = 1;
  91
  92 static const lc_opt_table_entry_t options[] = {
  93         LC_OPT_ENT_INT           ("asf",    "already spilled factor",                             &already_spilled_factor),
  94         LC_OPT_ENT_BOOL          ("remat",  "rematerializable ops get infinite long live ranges", &remat_live_range_ext),
  95         LC_OPT_ENT_BOOL          ("global", "enable/disable the global pass",                     &global_pass_enabled),
  96         LC_OPT_LAST
  97 };
  98
  99 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 100
 101 /**
 102  * An association between a node and a point in time.
 103  */
 104 typedef struct _loc_t {
 105   ir_node *irn;        /**< A node. */
 106   unsigned time;       /**< A use time.
 107                                                  In the global pass this is used
 108                                                  as the version number and not as a time.
 109                                                  Only to save space...
 110                                                 */
 111 } loc_t;
 112
 113 typedef struct _workset_t {
 114         int len;                        /**< current length */
 115         loc_t vals[0];          /**< inlined array of the values/distances in this working set */
 116 } workset_t;
 117
 118 typedef struct _belady_env_t {
 119         struct obstack ob;
 120         ir_graph *irg;
 121         const dfs_t *dfs;
 122         const arch_env_t *arch;
 123         const arch_register_class_t *cls;
 124         be_lv_t *lv;
 125         ir_exec_freq *ef;
 126
 127         ir_node **blocks;            /**< Array of all blocks. */
 128         int n_blocks;                /**< Number of blocks in the graph. */
 129         int n_regs;                              /**< number of regs in this reg-class */
 130         workset_t *ws;                   /**< the main workset used while processing a block. ob-allocated */
 131         ir_node *instr;                  /**< current instruction */
 132         int instr_nr;                    /**< current instruction number (relative to block start) */
 133
 134         spill_env_t *senv;               /**< see bespill.h */
 135         bitset_t *spilled;           /**< bitset to keep all the irns which have already been spilled. */
 136         ir_nodeset_t *extra_spilled; /** All nodes for which a special spill location has been computed. */
 137 } belady_env_t;
 138
 139
 140 static int loc_compare(const void *a, const void *b)
 141 {
 142         const loc_t *p = a;
 143         const loc_t *q = b;
 144         return (p->time > q->time) - (p->time < q->time);
 145 }
 146
 147 static inline void workset_print(const workset_t *w)
 148 {
 149         int i;
 150
 151         for(i = 0; i < w->len; ++i) {
 152                 ir_fprintf(stderr, "%+F %d\n", w->vals[i].irn, w->vals[i].time);
 153         }
 154 }
 155
 156 /**
 157  * Alloc a new workset on obstack @p ob with maximum size @p max
 158  */
 159 static inline workset_t *new_workset(belady_env_t *env, struct obstack *ob)
 160 {
 161         return OALLOCFZ(ob, workset_t, vals, env->n_regs);
 162 }
 163
 164 /**
 165  * Alloc a new instance on obstack and make it equal to @param ws
 166  */
 167 static inline workset_t *workset_clone(belady_env_t *env, struct obstack *ob, workset_t *ws)
 168 {
 169         workset_t *res = OALLOCF(ob, workset_t, vals, env->n_regs);
 170         memcpy(res, ws, sizeof(*res) + (env->n_regs)*sizeof(res->vals[0]));
 171         return res;
 172 }
 173
 174 /**
 175  * Do NOT alloc anything. Make @param tgt equal to @param src.
 176  * returns @param tgt for convenience
 177  */
 178 static inline workset_t *workset_copy(belady_env_t *env, workset_t *tgt, workset_t *src)
 179 {
 180         size_t size = sizeof(*src) + (env->n_regs)*sizeof(src->vals[0]);
 181         memcpy(tgt, src, size);
 182         return tgt;
 183 }
 184
 185 /**
 186  * Overwrites the current content array of @param ws with the
 187  * @param count locations given at memory @param locs.
 188  * Set the length of @param ws to count.
 189  */
 190 static inline void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
 191 {
 192         workset->len = count;
 193         memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
 194 }
 195
 196 /**
 197  * Inserts the value @p val into the workset, iff it is not
 198  * already contained. The workset must not be full.
 199  */
 200 static inline void workset_insert(belady_env_t *env, workset_t *ws, ir_node *val)
 201 {
 202         int i;
 203         /* check for current regclass */
 204         if (!arch_irn_consider_in_reg_alloc(env->cls, val)) {
 205                 // DBG((dbg, DBG_WORKSET, "Skipped %+F\n", val));
 206                 return;
 207         }
 208
 209         /* check if val is already contained */
 210         for(i=0; i<ws->len; ++i)
 211                 if (ws->vals[i].irn == val)
 212                         return;
 213
 214         /* insert val */
 215         assert(ws->len < env->n_regs && "Workset already full!");
 216         ws->vals[ws->len++].irn = val;
 217 }
 218
 219 /**
 220  * Removes all entries from this workset
 221  */
 222 static inline void workset_clear(workset_t *ws)
 223 {
 224         ws->len = 0;
 225 }
 226
 227 /**
 228  * Removes the value @p val from the workset if present.
 229  */
 230 static inline void workset_remove(workset_t *ws, ir_node *val)
 231 {
 232         int i;
 233         for(i=0; i<ws->len; ++i) {
 234                 if (ws->vals[i].irn == val) {
 235                         ws->vals[i] = ws->vals[--ws->len];
 236                         return;
 237                 }
 238         }
 239 }
 240
 241 static inline int workset_get_index(const workset_t *ws, const ir_node *val)
 242 {
 243         int i;
 244         for(i=0; i<ws->len; ++i) {
 245                 if (ws->vals[i].irn == val)
 246                         return i;
 247         }
 248
 249         return -1;
 250 }
 251
 252 /**
 253  * Iterates over all values in the working set.
 254  * @p ws The workset to iterate
 255  * @p v  A variable to put the current value in
 256  * @p i  An integer for internal use
 257  */
 258 #define workset_foreach(ws, v, i)       for(i=0; \
 259                                                                                 v=(i < ws->len) ? ws->vals[i].irn : NULL, i < ws->len; \
 260                                                                                 ++i)
 261
 262 #define workset_set_time(ws, i, t) (ws)->vals[i].time=t
 263 #define workset_get_time(ws, i) (ws)->vals[i].time
 264 #define workset_set_length(ws, length) (ws)->len = length
 265 #define workset_get_length(ws) ((ws)->len)
 266 #define workset_get_val(ws, i) ((ws)->vals[i].irn)
 267 #define workset_sort(ws) qsort((ws)->vals, (ws)->len, sizeof((ws)->vals[0]), loc_compare);
 268 #define workset_contains(ws, n) (workset_get_index(ws, n) >= 0)
 269
 270 typedef struct _bring_in_t bring_in_t;
 271
 272 typedef struct _block_info_t {
 273         belady_env_t *bel;
 274         ir_node *bl;
 275         int id;
 276         ir_phase next_uses;
 277         workset_t *ws_end;       /**< The end set after the local belady pass. */
 278         double exec_freq;        /**< The execution frequency of this block. */
 279
 280         double reload_cost;      /**< Cost of a reload in this block. */
 281         ir_node *first_non_in;   /**< First node in block which is not a phi.  */
 282         ir_node *last_ins;       /**< The instruction before which end of
 283                                                            block reloads will be inserted. */
 284
 285         int pressure;            /**< The amount of registers which remain free
 286                                                in this block. This capacity can be used to let
 287                                                global variables, transported into other blocks,
 288                                                live through this block. */
 289
 290         int front_pressure;      /**< The pressure right before the first
 291                                                            real (non-phi) node. At the beginning
 292                                                            of the global pass, this is 0. */
 293         struct list_head br_head; /**< List head for all bring_in variables. */
 294         int free_at_jump;         /**< registers free at jump. */
 295
 296 } block_info_t;
 297
 298 static inline void *new_block_info(belady_env_t *bel, int id)
 299 {
 300         ir_node      *bl  = bel->blocks[id];
 301         block_info_t *res = OALLOCZ(&bel->ob, block_info_t);
 302         res->first_non_in = NULL;
 303         res->last_ins     = NULL;
 304         res->bel          = bel;
 305         res->bl           = bl;
 306         res->id           = id;
 307         res->exec_freq    = get_block_execfreq(bel->ef, bl);
 308         res->reload_cost  = bel->arch->reload_cost * res->exec_freq;
 309         res->free_at_jump = bel->n_regs;
 310         INIT_LIST_HEAD(&res->br_head);
 311         set_irn_link(bl, res);
 312         return res;
 313 }
 314
 315 #define get_block_info(block)        ((block_info_t *)get_irn_link(block))
 316 #define set_block_info(block, info)  set_irn_link(block, info)
 317
 318 static inline ir_node *block_info_get_last_ins(block_info_t *bi)
 319 {
 320         if (!bi->last_ins)
 321                 bi->last_ins = be_get_end_of_block_insertion_point(bi->bl);
 322
 323         return bi->last_ins;
 324 }
 325
 326 typedef struct _next_use_t {
 327         unsigned is_first_use : 1; /**< Indicate that this use is the first
 328                                                                  in the block. Needed to identify
 329                                                                  transport in values for the global
 330                                                                  pass. */
 331         sched_timestep_t step;     /**< The time step of the use. */
 332         ir_node *irn;
 333         struct _next_use_t *next;  /**< The next use int this block
 334                                                                  or NULL. */
 335 } next_use_t;
 336
 337 static void *next_use_init(ir_phase *phase, const ir_node *irn, void *old)
 338 {
 339         (void) phase;
 340         (void) irn;
 341         (void) old;
 342         return NULL;
 343 }
 344
 345 static void build_next_uses(block_info_t *bi)
 346 {
 347         ir_node *irn;
 348
 349         sched_renumber(bi->bl);
 350
 351         phase_init(&bi->next_uses, "next uses", bi->bel->irg, PHASE_DEFAULT_GROWTH, next_use_init, NULL);
 352         sched_foreach_reverse(bi->bl, irn) {
 353                 int i;
 354
 355                 if (is_Phi(irn))
 356                         break;
 357
 358                 for (i = get_irn_arity(irn) - 1; i >= 0; --i) {
 359                         ir_node *op = get_irn_n(irn, i);
 360                         next_use_t *curr = phase_get_irn_data(&bi->next_uses, op);
 361                         next_use_t *use  = phase_alloc(&bi->next_uses, sizeof(use[0]));
 362
 363                         use->is_first_use = 1;
 364                         use->step         = sched_get_time_step(irn);
 365                         use->next         = curr;
 366                         use->irn          = irn;
 367
 368                         if (curr) {
 369                                 curr->is_first_use = 0;
 370                                 assert(curr->step >= use->step);
 371                         }
 372
 373                         phase_set_irn_data(&bi->next_uses, op, use);
 374                 }
 375         }
 376 }
 377
 378 #define get_current_use(bi, irn)         phase_get_irn_data(&(bi)->next_uses, (irn))
 379
 380 static inline void advance_current_use(block_info_t *bi, const ir_node *irn)
 381 {
 382         next_use_t *use = get_current_use(bi, irn);
 383
 384         assert(use);
 385         phase_set_irn_data(&bi->next_uses, irn, use->next);
 386 }
 387
 388 static __attribute__((unused)) int block_freq_gt(const void *a, const void *b)
 389 {
 390         const ir_node * const *p = a;
 391         const ir_node * const *q = b;
 392         block_info_t *pi = get_block_info(*p);
 393         block_info_t *qi = get_block_info(*q);
 394         double diff = qi->exec_freq - pi->exec_freq;
 395         return (diff > 0) - (diff < 0);
 396 }
 397
 398 static int block_freq_dfs_gt(const void *a, const void *b)
 399 {
 400         const ir_node * const *p = a;
 401         const ir_node * const *q = b;
 402         block_info_t *pi = get_block_info(*p);
 403         block_info_t *qi = get_block_info(*q);
 404         double diff;
 405
 406         if ((pi->exec_freq > 1.0 && qi->exec_freq > 1.0)
 407                         || (pi->exec_freq <= 1.0 && qi->exec_freq <= 1.0)) {
 408
 409                 const dfs_t *dfs = pi->bel->dfs;
 410                 int pp = dfs_get_post_num(dfs, pi->bl);
 411                 int pq = dfs_get_post_num(dfs, qi->bl);
 412                 return pq - pp;
 413         }
 414
 415         diff = qi->exec_freq - pi->exec_freq;
 416         return (diff > 0) - (diff < 0);
 417 }
 418
 419 /*
 420    ____       _               ___
 421   | __ ) _ __(_)_ __   __ _  |_ _|_ __
 422   |  _ \| '__| | '_ \ / _` |  | || '_ \
 423   | |_) | |  | | | | | (_| |  | || | | |
 424   |____/|_|  |_|_| |_|\__, | |___|_| |_|
 425                       |___/
 426
 427   Data structures to represent bring in variables.
 428 */
 429
 430 struct _bring_in_t {
 431         ir_node *irn;              /**< The node to bring in. */
 432         block_info_t *bi;          /**< The block to which bring in should happen. */
 433         int pressure_so_far;       /**< The maximal pressure till the first use of irn in bl. */
 434         ir_node *first_use;        /**< The first user of irn in bl. */
 435         sched_timestep_t use_step; /**< Schedule step of the first use. */
 436
 437         int is_remat : 1;          /**< Is rematerializable. */
 438         int sect_pressure;         /**< Offset to maximum pressure in block. */
 439         struct list_head list;
 440         struct list_head sect_list;
 441         bring_in_t *sect_head;
 442 };
 443
 444 static inline bring_in_t *new_bring_in(block_info_t *bi, ir_node *irn, const next_use_t *use)
 445 {
 446         bring_in_t *br      = OALLOC(&bi->bel->ob, bring_in_t);
 447         br->irn             = irn;
 448         br->bi              = bi;
 449         br->first_use       = use->irn;
 450         br->use_step        = use->step;
 451         br->is_remat        = be_is_rematerializable(bi->bel->senv, irn, use->irn);
 452         br->pressure_so_far = bi->pressure;
 453         br->sect_pressure   = bi->front_pressure;
 454         br->sect_head       = br;
 455
 456         INIT_LIST_HEAD(&br->list);
 457         INIT_LIST_HEAD(&br->sect_list);
 458         list_add_tail(&br->list, &bi->br_head);
 459         return br;
 460 }
 461
 462 static int bring_in_cmp(const void *a, const void *b)
 463 {
 464         const bring_in_t *p = *(const bring_in_t * const *) a;
 465         const bring_in_t *q = *(const bring_in_t * const *) b;
 466         double fp, fq;
 467
 468         /* if one of both is a remat node, it will be done after the other. */
 469         if (p->is_remat != q->is_remat)
 470                 return p->is_remat - q->is_remat;
 471
 472         /* in the same block, the one further in the front has to be processed first!
 473          * Otherwise the front_pressure 'trick' is not exact. */
 474         if (p->bi == q->bi)
 475                 return p->use_step - q->use_step;
 476
 477         fp = p->bi->exec_freq;
 478         fq = q->bi->exec_freq;
 479
 480         /* if both have the same frequency, inspect the frequency of the definition */
 481         if (fp == fq) {
 482                 double fdp = get_block_info(get_nodes_block(p->irn))->exec_freq;
 483                 double fdq = get_block_info(get_nodes_block(q->irn))->exec_freq;
 484
 485                 /* if the defs of both have the same freq, we go for reverse dfs post order. */
 486                 if (fdp == fdq) {
 487                         const dfs_t *dfs = p->bi->bel->dfs;
 488                         int pp = dfs_get_post_num(dfs, p->bi->bl);
 489                         int pq = dfs_get_post_num(dfs, q->bi->bl);
 490                         return pq - pp;
 491                 }
 492
 493                 return (fdq > fdp) - (fdq < fdp);
 494         }
 495
 496         return (fq > fp) - (fq < fp);
 497 }
 498
 499 static inline unsigned get_curr_distance(block_info_t *bi, const ir_node *irn, int is_usage)
 500 {
 501         belady_env_t *env          = bi->bel;
 502         sched_timestep_t curr_step = sched_get_time_step(env->instr);
 503         next_use_t *use            = get_current_use(bi, irn);
 504         int flags                  = arch_irn_get_flags(irn);
 505
 506         assert(!arch_irn_is_ignore(irn));
 507
 508         /* We have to keep non-spillable nodes in the working set */
 509         if(flags & arch_irn_flags_dont_spill)
 510                 return 0;
 511
 512         if (!is_usage && use && use->step == curr_step)
 513                 use = use->next;
 514
 515         if (use) {
 516                 unsigned res  = use->step - curr_step;
 517
 518                 assert(use->step >= curr_step);
 519
 520                 if (res != 0) {
 521                         if (remat_live_range_ext && be_is_rematerializable(env->senv, irn, use->irn))
 522                                 res = REMAT_DIST;
 523                         else if (bitset_contains_irn(env->spilled, irn))
 524                                 res *= already_spilled_factor;
 525                 }
 526
 527                 return res;
 528         }
 529
 530         return be_is_live_end(env->lv, bi->bl, irn) ? LIVE_END : DEAD;
 531 }
 532
 533 static inline int is_local_phi(const ir_node *bl, const ir_node *irn)
 534 {
 535         return is_Phi(irn) && get_nodes_block(irn) == bl;
 536 }
 537
 538 /**
 539  * Check, if the value is something that is transported into a block.
 540  * That is, the value is defined elsewhere or defined by a Phi in the block.
 541  * @param env  The belady environment.
 542  * @param bl   The block in question.
 543  * @param irn  The node in question.
 544  * @return     1, if node is something transported into @p bl, 0 if not.
 545  * @note       The function will only give correct answers in the case
 546  *             where @p irn is unused in the block @p bl which is always
 547  *             the case in our usage scenario.
 548  */
 549 static inline int is_transport_in(const ir_node *bl, const ir_node *irn)
 550 {
 551         return get_nodes_block(irn) != bl || is_Phi(irn);
 552 }
 553
 554 /**
 555  * Performs the actions necessary to grant the request that:
 556  * - new_vals can be held in registers
 557  * - as few as possible other values are disposed
 558  * - the worst values get disposed
 559  *
 560  * @p is_usage indicates that the values in new_vals are used (not defined)
 561  * In this case reloads must be performed
 562  */
 563 static void displace(block_info_t *bi, workset_t *new_vals, int is_usage)
 564 {
 565         belady_env_t *env       = bi->bel;
 566         workset_t    *ws        = env->ws;
 567         ir_node     **to_insert = ALLOCAN(ir_node*, env->n_regs);
 568
 569         int i, len, max_allowed, demand, iter;
 570         ir_node *val;
 571
 572         /*
 573                 1. Identify the number of needed slots and the values to reload
 574         */
 575         demand = 0;
 576         workset_foreach(new_vals, val, iter) {
 577                 /* mark value as used */
 578
 579                 if (! workset_contains(ws, val)) {
 580                         DBG((dbg, DBG_DECIDE, "\t\tinsert %+F\n", val));
 581                         to_insert[demand++] = val;
 582                         if (is_usage) {
 583                                 next_use_t *use = get_current_use(bi, val);
 584
 585                                 /*
 586                                  * if we use a value which is transported in this block, i.e. a
 587                                  * phi defined here or a live in, for the first time, we check
 588                                  * if there is room for that guy to survive from the block's
 589                                  * entrance to here or not.
 590                                  */
 591                                 assert(use);
 592                                 assert(sched_get_time_step(env->instr) == (int) use->step);
 593                                 if (is_transport_in(bi->bl, val) && use->is_first_use) {
 594                                         bring_in_t *bri = new_bring_in(bi, val, use);
 595                                         bri->first_use = env->instr;
 596
 597                                         /* reset the section pressure, since a new section starts. */
 598                                         bi->front_pressure = 0;
 599
 600                                         DBG((dbg, DBG_DECIDE, "\t\tbring in node %+F, pressure %d:\n", val, bi->pressure));
 601                                         DBG((dbg, DBG_DECIDE, "\t\tno reload. must be considered at block start\n"));
 602                                 }
 603
 604                                 else {
 605                                         bitset_add_irn(env->spilled, val);
 606                                         DBG((dbg, DBG_SPILL, "\t\tReload %+F before %+F\n", val, env->instr));
 607                                         be_add_reload(env->senv, val, env->instr, env->cls, 1);
 608                                 }
 609                         }
 610                 } else {
 611                         assert(is_usage && "Defined value already in workset?!?");
 612                         DBG((dbg, DBG_DECIDE, "\t\tskip %+F\n", val));
 613                 }
 614         }
 615         DBG((dbg, DBG_DECIDE, "\t\tdemand = %d\n", demand));
 616
 617         /*
 618                 2. Make room for at least 'demand' slots
 619         */
 620         len         = workset_get_length(ws);
 621         max_allowed = env->n_regs - demand;
 622
 623         /* Only make more free room if we do not have enough */
 624         if (len > max_allowed) {
 625                 DBG((dbg, DBG_DECIDE, "\t\tdisposing %d values\n", len - max_allowed));
 626
 627                 /* get current next-use distance */
 628                 for (i = 0; i < ws->len; ++i) {
 629                         ir_node *val  = workset_get_val(ws, i);
 630                         unsigned dist = get_curr_distance(bi, val, is_usage);
 631                         workset_set_time(ws, i, dist);
 632                 }
 633
 634                 /* sort entries by increasing nextuse-distance*/
 635                 workset_sort(ws);
 636
 637                 /* kill the last 'demand' entries in the array */
 638                 workset_set_length(ws, max_allowed);
 639         }
 640
 641         /*
 642                 3. Insert the new values into the workset
 643                    Also, we update the pressure in the block info.
 644                    That is important for the global pass to decide
 645                    how many values can live through the block.
 646         */
 647         for (i = 0; i < demand; ++i)
 648                 workset_insert(env, env->ws, to_insert[i]);
 649
 650         /* TODO: simplify this expression? */
 651         bi->pressure       = MAX(bi->pressure,       workset_get_length(env->ws));
 652         bi->front_pressure = MAX(bi->front_pressure, workset_get_length(env->ws));
 653 }
 654
 655 /**
 656  * For the given block @p block, decide for each values
 657  * whether it is used from a register or is reloaded
 658  * before the use.
 659  */
 660 static void belady(belady_env_t *env, int id)
 661 {
 662         block_info_t *block_info = new_block_info(env, id);
 663         const ir_node *block     = block_info->bl;
 664
 665         workset_t *new_vals;
 666         ir_node *irn;
 667         int iter;
 668
 669         DBG((dbg, DBG_WSETS, "Belady on %+F\n", block_info->bl));
 670         new_vals = new_workset(env, &env->ob);
 671         workset_clear(env->ws);
 672
 673         /* build the next use information for this block. */
 674         build_next_uses(block_info);
 675
 676         env->instr_nr = 0;
 677         block_info->first_non_in = NULL;
 678
 679         /* process the block from start to end */
 680         sched_foreach(block, irn) {
 681                 int i, arity;
 682                 assert(workset_get_length(env->ws) <= env->n_regs && "Too much values in workset!");
 683
 684                 /* Projs are handled with the tuple value.
 685                  * Phis are no real instr (see insert_starters())
 686                  * instr_nr does not increase */
 687                 if (is_Proj(irn) || is_Phi(irn))
 688                         continue;
 689                 DBG((dbg, DBG_DECIDE, "\t%+F\n", irn));
 690
 691                 if (!block_info->first_non_in)
 692                         block_info->first_non_in = irn;
 693
 694                 /* set instruction in the workset */
 695                 env->instr = irn;
 696
 697                 /* allocate all values _used_ by this instruction */
 698                 workset_clear(new_vals);
 699                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
 700                         workset_insert(env, new_vals, get_irn_n(irn, i));
 701                 }
 702                 DBG((dbg, DBG_DECIDE, "\t* uses\n"));
 703                 displace(block_info, new_vals, 1);
 704
 705                 /*
 706                  * set all used variables to the next use in their next_use_t list
 707                  * Also, kill all dead variables from the workset. They are only
 708                  * augmenting the pressure. Note, that a variable is dead
 709                  * if it has no further use in this block and is *not* live end
 710                  */
 711                 for(i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
 712                         ir_node *op = get_irn_n(irn, i);
 713                         next_use_t *use = get_current_use(block_info, op);
 714
 715                         assert(use);
 716                         if (!use->next && !be_is_live_end(env->lv, block, op))
 717                                 workset_remove(env->ws, op);
 718
 719                         advance_current_use(block_info, op);
 720                 }
 721
 722                 /* allocate all values _defined_ by this instruction */
 723                 workset_clear(new_vals);
 724                 if (get_irn_mode(irn) == mode_T) { /* special handling for Tuples and Projs */
 725                         const ir_edge_t *edge;
 726
 727                         foreach_out_edge(irn, edge) {
 728                                 ir_node *proj = get_edge_src_irn(edge);
 729                                 workset_insert(env, new_vals, proj);
 730                         }
 731                 } else {
 732                         workset_insert(env, new_vals, irn);
 733                 }
 734                 DBG((dbg, DBG_DECIDE, "\t* defs\n"));
 735                 displace(block_info, new_vals, 0);
 736
 737                 if (is_op_forking(get_irn_op(env->instr))) {
 738                         for (i = get_irn_arity(env->instr) - 1; i >= 0; --i) {
 739                                 ir_node *op = get_irn_n(env->instr, i);
 740                                 block_info->free_at_jump -= arch_irn_consider_in_reg_alloc(env->cls, op);
 741                         }
 742                 }
 743
 744                 env->instr_nr++;
 745         }
 746
 747         phase_free(&block_info->next_uses);
 748
 749         /* Remember end-workset for this block */
 750         block_info->ws_end = workset_clone(env, &env->ob, env->ws);
 751         DBG((dbg, DBG_WSETS, "End workset for %+F:\n", block));
 752         workset_foreach(block_info->ws_end, irn, iter)
 753                 DBG((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(block_info->ws_end, iter)));
 754         DBG((dbg, DBG_WSETS, "Max pressure in block: %d\n", block_info->pressure));
 755
 756         /* now, initialize the front pressure to 0. */
 757         block_info->front_pressure = 0;
 758 }
 759
 760 /*
 761  _____ _                  _       _           _   ____            _
 762 |_   _| |__   ___    __ _| | ___ | |__   __ _| | |  _ \ __ _ _ __| |_
 763   | | | '_ \ / _ \  / _` | |/ _ \| '_ \ / _` | | | |_) / _` | '__| __|
 764   | | | | | |  __/ | (_| | | (_) | |_) | (_| | | |  __/ (_| | |  | |_
 765   |_| |_| |_|\___|  \__, |_|\___/|_.__/ \__,_|_| |_|   \__,_|_|   \__|
 766                     |___/
 767
 768 */
 769
 770 #define workset_set_version(ws, i, t) ((ws)->vals[(i)].time = (t))
 771 #define workset_get_version(ws, i)    ((ws)->vals[(i)].time)
 772
 773 #define ver_oldest                        (0)
 774 #define ver_youngest                      ((unsigned) -1)
 775 #define ver_make_newer(v)                 ((v) + 1)
 776 #define ver_is_older(v, w)                ((v) < (w))
 777 #define ver_is_younger(v, w)              ((v) > (w))
 778
 779 enum {
 780         irn_act_none = 0,
 781         irn_act_reload,
 782         irn_act_live_through
 783 };
 784
 785 typedef struct _block_state_t {
 786         struct _block_state_t *next;
 787         struct _block_state_t *next_intern;
 788         block_info_t *bi;
 789         int pressure;
 790         workset_t *end_state;
 791 } block_state_t;
 792
 793 typedef struct _irn_action_t {
 794         struct _irn_action_t *next;
 795         ir_node *irn;
 796         const ir_node *bl;
 797         int act;
 798 } irn_action_t;
 799
 800 typedef struct _global_end_state_t {
 801         belady_env_t *env;
 802         bitset_t *succ_phis;
 803         bitset_t *committed;
 804         struct obstack obst;
 805         void *reset_level;
 806         unsigned version;
 807
 808         unsigned       *bs_tops_vers;
 809         block_state_t **bs_tops;
 810         block_state_t  *bs_top;
 811         irn_action_t   *ia_top;
 812 } global_end_state_t;
 813
 814 typedef struct {
 815         void          *obst_level;
 816         block_state_t *bs_top;
 817         irn_action_t  *ia_top;
 818 } rollback_info_t;
 819
 820 static inline block_state_t *get_block_state(global_end_state_t *ges, const block_info_t *bi)
 821 {
 822         int id = bi->id;
 823         assert(!ver_is_younger(ges->bs_tops_vers[id], ges->version));
 824         return ver_is_older(ges->bs_tops_vers[id], ges->version) ? NULL : ges->bs_tops[bi->id];
 825 }
 826
 827 static inline const workset_t *get_end_state(global_end_state_t *ges, block_info_t *bi)
 828 {
 829         block_state_t *bs = get_block_state(ges, bi);
 830         return bs ? bs->end_state : bi->ws_end;
 831 }
 832
 833 static block_state_t *new_block_state(global_end_state_t *ges, block_info_t *bi)
 834 {
 835         block_state_t *bs = get_block_state(ges, bi);
 836         block_state_t *nw = OALLOC(&ges->obst, block_state_t);
 837
 838         nw->next_intern = bs;
 839         nw->next        = ges->bs_top;
 840         nw->bi          = bi;
 841
 842         if (bs) {
 843                 nw->pressure  = bs->pressure;
 844                 nw->end_state = workset_clone(ges->env, &ges->obst, bs->end_state);
 845         }
 846         else {
 847                 nw->pressure  = bi->pressure;
 848                 nw->end_state = workset_clone(ges->env, &ges->obst, bi->ws_end);
 849         }
 850
 851         ges->bs_top               = nw;
 852         ges->bs_tops[bi->id]      = nw;
 853         ges->bs_tops_vers[bi->id] = ges->version;
 854         return nw;
 855 }
 856
 857 static irn_action_t *new_irn_action(global_end_state_t *ges, ir_node *irn, const ir_node *bl)
 858 {
 859         irn_action_t *ia = OALLOC(&ges->obst, irn_action_t);
 860
 861         ia->irn  = irn;
 862         ia->bl   = bl;
 863         ia->act  = irn_act_none;
 864         ia->next = ges->ia_top;
 865         ges->ia_top = ia;
 866         return ia;
 867 }
 868
 869 static inline rollback_info_t trans_begin(global_end_state_t *ges)
 870 {
 871         rollback_info_t rb;
 872         rb.obst_level = obstack_base(&ges->obst);
 873         rb.bs_top     = ges->bs_top;
 874         rb.ia_top     = ges->ia_top;
 875         return rb;
 876 }
 877
 878 static inline void trans_rollback(global_end_state_t *ges, rollback_info_t *rb)
 879 {
 880         block_state_t *bs;
 881
 882         /* unwind all the stacks indiced with the block number */
 883         for (bs = ges->bs_top; bs != rb->bs_top; bs = bs->next) {
 884                 unsigned id = bs->bi->id;
 885                 ges->bs_tops[id] = bs->next_intern;
 886         }
 887
 888         ges->ia_top = rb->ia_top;
 889         ges->bs_top = rb->bs_top;
 890         obstack_free(&ges->obst, rb->obst_level);
 891 }
 892
 893
 894 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level);
 895
 896 static double can_make_available_at_end(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
 897 {
 898         block_info_t *bi     = get_block_info(bl);
 899         const workset_t *end = get_end_state(ges, bi);
 900         double res;
 901         int index;
 902
 903         DBG((dbg, DBG_GLOBAL, "\t%2Dcan make avail %+F at end of %+F\n", level, irn, bl));
 904
 905         /*
 906          * to make the value available at end,
 907          * we have several cases here.
 908          *
 909          * - we already visited that block.
 910          * - If the value is in the final end set, return 0.
 911          *   somebody else already allocated it there.
 912          * - If not and the final end set is already full,
 913          *   we cannot make the value available at the end
 914          *   of this block. return INFINITY.
 915          * - Else (value not in final end set and there is room):
 916          *   1) The value is in a register at the end of the local Belady pass.
 917          *      Allocate a slot in  the final end set and return 0.
 918          *   2) The value is not in the Belady end set:
 919          *      If the block's capacity is < k then check what it costs
 920          *      to transport the value from upper blocks to this block.
 921          *      Compare that against the reload cost in this block. If
 922          *      cheaper, do the other thing. If not, reload it here.
 923          */
 924
 925         /* if the end set contains it already, it is in a reg and it costs nothing
 926          * to load it to one. */
 927         index = workset_get_index(end, irn);
 928         if (index >= 0) {
 929                 unsigned ver = workset_get_version(end, index);
 930                 DBG((dbg, DBG_GLOBAL, "\t%2Dnode is in the end set and is %s fixed\n",
 931                                         level, ver_is_older(ver, ges->version) ? "already" : "not yet"));
 932
 933                 /*
 934                  * if the version is older, the value is already fixed
 935                  * and cannot be removed from the end set.
 936                  *
 937                  * If not, we will create a new block state for that block since
 938                  * we modify it by giving the end state a new version.
 939                  */
 940                 if (ver_is_younger(ver, ges->version)) {
 941                         block_state_t *bs = new_block_state(ges, bi);
 942                         workset_set_version(bs->end_state, index, ges->version);
 943                 }
 944
 945                 res = 0.0;
 946                 goto end;
 947         }
 948
 949         /*
 950          * Now we have two options:
 951          * 1) Reload the value at the end of the block.
 952          *    Therefore, perhaps, we have to erase another one from the workset.
 953          *    This may only be done if it has not been fixed.
 954          *    Since fixed means that a previous pass has decided that that value
 955          *    *has* to stay in the end set.
 956          * 2) we can try, if the capacity of the block allows it, to let
 957          *    the value live through the block and make it available at
 958          *    the entrance.
 959          *
 960          * First, we test the local (reload in this block) alternative
 961          * and compare against the other alternative.
 962          * Of course, we chose the cheaper one.
 963          */
 964
 965         {
 966                 int n_regs = bi->free_at_jump;
 967                 int len  = workset_get_length(end);
 968                 int slot = -1;
 969                 int i;
 970
 971                 res = HUGE_VAL;
 972
 973                 /*
 974                  * look if there is room in the end array
 975                  * for the variable. Note that this does not
 976                  * mean that the var can live through the block.
 977                  * There is just room at the *end*
 978                  */
 979                 if (len < n_regs) {
 980                         DBG((dbg, DBG_GLOBAL, "\t%2Dthe end set has %d free slots\n", level, n_regs - len));
 981                         slot = len;
 982                 } else {
 983                         for (i = 0; i < len; ++i) {
 984                                 unsigned ver = workset_get_version(end, i);
 985                                 if (ver_is_younger(ver, ges->version))
 986                                         break;
 987                         }
 988
 989                         if (i < len) {
 990                                 DBG((dbg, DBG_GLOBAL, "\t%2D%+F (slot %d) can be erased from the end set\n",
 991                                                         level, end->vals[i].irn, i));
 992                                 slot = i;
 993                         }
 994                 }
 995
 996                 /*
 997                  * finally there is some room. we can at least reload the value.
 998                  * but we will try to let or live through anyhow.
 999                  */
1000                 if (slot >= 0) {
1001                         irn_action_t *vs    = new_irn_action(ges, irn, bi->bl);
1002                         block_state_t *bs   = new_block_state(ges, bi);
1003                         workset_t *end      = bs->end_state;
1004                         ir_node *ins_before = block_info_get_last_ins(bi);
1005                         double reload_here  = be_get_reload_costs(bi->bel->senv, irn, ins_before);
1006                         int pressure_ok     = bs->pressure < n_regs;
1007
1008                         if (reload_here < bi->reload_cost)
1009                                 reload_here = 0.0;
1010
1011                         /*
1012                          * No matter what we do, the value will be in the end set
1013                          * if the block from now on (of course only regarding the
1014                          * current state). Enter it and set the new length
1015                          * appropriately.
1016                          */
1017                         end->vals[slot].irn     = irn;
1018                         workset_set_version(end, slot, ges->version);
1019                         workset_set_length(end, MAX(workset_get_length(end), slot + 1));
1020
1021                         vs->act = irn_act_reload;
1022                         res     = reload_here;
1023
1024                         DBG((dbg, DBG_GLOBAL, "\t%2Dthere is a free slot. capacity=%d, reload here=%f, pressure %s\n",
1025                                                 level, n_regs - bs->pressure, reload_here, pressure_ok ? "ok" : "insufficient"));
1026
1027
1028                         /* look if we can bring the value in. */
1029                         if (pressure_ok && reload_here > 0.0) {
1030                                 rollback_info_t rb = trans_begin(ges);
1031                                 double new_limit   = MIN(reload_here, limit);
1032
1033                                 vs->act = irn_act_live_through;
1034                                 bs->pressure += 1;
1035                                 res = can_bring_in(ges, bl, irn, new_limit, level + 1);
1036
1037                                 /*
1038                                  * if bring in is too expensive re-adjust the pressure
1039                                  * and roll back the state
1040                                  */
1041                                 if (res >= reload_here) {
1042                                         bs->pressure -= 1;
1043                                         vs->act = irn_act_reload;
1044                                         trans_rollback(ges, &rb);
1045                                         res = reload_here;
1046                                 }
1047                         }
1048
1049
1050                         DBG((dbg, DBG_GLOBAL, "\t%2D%s\n", level,
1051                                                 vs->act == irn_act_reload ? "reloading" : "bringing in"));
1052                 }
1053         }
1054
1055 end:
1056         DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, res));
1057         return res;
1058 }
1059
1060 static double can_bring_in(global_end_state_t *ges, ir_node *bl, ir_node *irn, double limit, int level)
1061 {
1062         belady_env_t *env = ges->env;
1063         double glob_costs = HUGE_VAL;
1064
1065         DBG((dbg, DBG_GLOBAL, "\t%2Dcan bring in (max %f) for %+F at block %+F\n", level, limit, irn, bl));
1066
1067         if (is_transport_in(bl, irn)) {
1068                 int i, n           = get_irn_arity(bl);
1069                 rollback_info_t rb = trans_begin(ges);
1070
1071                 glob_costs = 0.0;
1072                 for (i = 0; i < n; ++i) {
1073                         ir_node *pr = get_Block_cfgpred_block(bl, i);
1074                         ir_node *op = is_local_phi(bl, irn) ? get_irn_n(irn, i) : irn;
1075                         double c;
1076
1077                         /*
1078                          * there might by Unknowns as operands of Phis in that case
1079                          * we set the costs to zero, since they won't get spilled.
1080                          */
1081                         if (arch_irn_consider_in_reg_alloc(env->cls, op))
1082                                 c = can_make_available_at_end(ges, pr, op, limit - glob_costs, level + 1);
1083                         else
1084                                 c = 0.0;
1085
1086                         glob_costs += c;
1087
1088                         if (glob_costs >= limit) {
1089                                 glob_costs = HUGE_VAL;
1090                                 trans_rollback(ges, &rb);
1091                                 goto end;
1092                         }
1093                 }
1094         }
1095
1096 end:
1097         DBG((dbg, DBG_GLOBAL, "\t%2D-> %f\n", level, glob_costs));
1098         return glob_costs;
1099 }
1100
1101 static void materialize_and_commit_end_state(global_end_state_t *ges)
1102 {
1103         belady_env_t *env = ges->env;
1104         irn_action_t *ia;
1105         block_state_t *bs;
1106
1107         DBG((dbg, DBG_GLOBAL, "\tmaterializing\n"));
1108
1109         /*
1110          * Perform all the variable actions.
1111          */
1112         for (ia = ges->ia_top; ia != NULL; ia = ia->next) {
1113                 switch (ia->act) {
1114                         case irn_act_live_through:
1115                                 {
1116                                         block_info_t *bi = get_block_info(ia->bl);
1117                                         bring_in_t *iter;
1118
1119                                         if (is_local_phi(ia->bl, ia->irn)) {
1120                                                 bitset_add_irn(ges->succ_phis, ia->irn);
1121                                                 DBG((dbg, DBG_GLOBAL, "\t\tlive through phi kept alive: %+F\n", ia->irn));
1122                                         }
1123
1124                                         list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list)
1125                                                 ++iter->sect_pressure;
1126                                         ++bi->front_pressure;
1127                                 }
1128                                 break;
1129                         case irn_act_reload:
1130                                 be_add_reload_at_end(env->senv, ia->irn, ia->bl, env->cls, 1);
1131                                 DBG((dbg, DBG_GLOBAL, "\t\tadding reload of %+F at end of %+F\n", ia->irn, ia->bl));
1132                                 break;
1133                         default:
1134                                 DBG((dbg, DBG_GLOBAL, "\t\t%+F is in the end set of %+F\n", ia->irn, ia->bl));
1135                 }
1136         }
1137
1138         /*
1139          * Commit the block end states
1140          */
1141         for (bs = ges->bs_top; bs != NULL; bs = bs->next) {
1142                 block_info_t *bi = bs->bi;
1143
1144                 if (!bitset_is_set(ges->committed, bi->id)) {
1145                         DBG((dbg, DBG_GLOBAL, "\t\tcommiting workset of %+F with version %x\n", bi->bl, ges->version));
1146                         // bes->bs->end_state->vals[idx].version = ges->version;
1147                         workset_copy(env, bi->ws_end, bs->end_state);
1148                         DBG((dbg, DBG_GLOBAL, "\t\told pressure: %d, new pressure: %d, end length: %d\n",
1149                                                 bi->pressure, bs->pressure, workset_get_length(bs->end_state)));
1150                         bi->pressure = bs->pressure;
1151                         bitset_set(ges->committed, bi->id);
1152                 }
1153         }
1154
1155         /* clear the committed bitset. the next call is expecting it. */
1156         bitset_clear_all(ges->committed);
1157 }
1158
1159 static ir_node *better_spilled_here(const bring_in_t *br)
1160 {
1161         const block_info_t *bi = br->bi;
1162         double spill_ef        = get_block_info(get_nodes_block(br->irn))->exec_freq;
1163
1164         /*
1165          * If the bring in node is a phi in the bring in block,
1166          * we look at all definitions and sum up their execution frequencies,
1167          * since spills will be placed there.
1168          * (except for the case where an operand is also a phi which is spilled :-( )
1169          * If that cost is higher than spilling the phi in that block, we opt for
1170          * bringing the phi into the block and spill it there.
1171          */
1172         if (is_local_phi(bi->bl, br->irn)) {
1173                 ir_node *bl = bi->bl;
1174                 int i;
1175
1176                 spill_ef = 0.0;
1177                 for (i = get_Block_n_cfgpreds(bl) - 1; i >= 0; --i)
1178                         spill_ef += get_block_info(get_Block_cfgpred_block(bl, i))->exec_freq;
1179         }
1180
1181         return bi->exec_freq < spill_ef ? sched_prev(bi->first_non_in) : NULL;
1182 }
1183
1184 static int get_max_pressure_so_far(const block_info_t *bi, const bring_in_t *br)
1185 {
1186         const struct list_head *l;
1187         int res = INT_MIN;
1188
1189         assert(br->bi == bi);
1190         for (l = &br->list; l != &bi->br_head; l = l->prev) {
1191                 br  = list_entry(l, bring_in_t, list);
1192                 res = MAX(res, br->sect_pressure);
1193         }
1194
1195         /* finally consider the front pressure distance and add the reference line (the global block pressure) */
1196         return MAX(res, bi->front_pressure);
1197 }
1198
1199 #define block_last_bring_in(bi)  list_entry((bi)->br_head.prev, bring_in_t, list)
1200
1201 #if 0
1202 static int get_block_max_pressure(const block_info_t *bi)
1203 {
1204         int max = get_max_pressure_so_far(bi, block_last_bring_in(bi));
1205         return MAX(bi->pressure, max);
1206 }
1207 #endif
1208
1209 /**
1210  * Try to bring a variable into a block.
1211  * @param ges   The state of all end sets.
1212  * @param block The block.
1213  * @param irn   The variable.
1214  */
1215 static void optimize_variable(global_end_state_t *ges, bring_in_t *br)
1216 {
1217         block_info_t *bi          = br->bi;
1218         ir_node *irn              = br->irn;
1219         ir_node *bl               = bi->bl;
1220         belady_env_t *env         = ges->env;
1221         void *reset_level         = obstack_base(&ges->obst);
1222         int k                     = env->n_regs;
1223         int pressure_upto_use     = get_max_pressure_so_far(bi, br);
1224         int front_pressure        = bi->front_pressure;
1225         ir_node *better_spill_loc = NULL;
1226
1227         assert(front_pressure <= k);
1228         assert(pressure_upto_use <= k);
1229
1230         DBG((dbg, DBG_GLOBAL, "fixing %+F at %+F (%f), front pr: %d, pr to use: %d, first use: %x\n",
1231                                 irn, bl, bi->exec_freq, front_pressure, pressure_upto_use, br->first_use));
1232
1233         // assert(!is_local_phi(bl, irn) || !bitset_contains_irn(ges->succ_phis, irn));
1234
1235         /*
1236          * if we cannot bring the value to the use, let's see if it would be worthwhile
1237          * to bring the value to the beginning of the block to have a better spill
1238          * location.
1239          *
1240          * better _spilled_here will return a node where the value can be spilled after
1241          * or NULL if this block does not provide a better spill location.
1242          */
1243 #if 1
1244         if (pressure_upto_use >= k && front_pressure < k && !bitset_contains_irn(env->spilled, irn))
1245                 better_spill_loc = better_spilled_here(br);
1246 #endif
1247
1248         /*
1249          * If either we can bring the value to the use or we should try
1250          * to bring it here to do the spill here, let's try to bring it in.
1251          */
1252         if (better_spill_loc || pressure_upto_use < k) {
1253                 block_state_t *bs;
1254                 double bring_in_costs, local_costs;
1255                 rollback_info_t trans;
1256                 int pressure_inc;
1257
1258                 /* process all variables which shall be in a reg at
1259                  * the beginning of the block in the order of the next use. */
1260                 local_costs  = be_get_reload_costs(env->senv, irn, br->first_use);
1261
1262                 /* reset the lists */
1263                 ges->bs_top  = NULL;
1264                 ges->ia_top  = NULL;
1265
1266                 /* if the variable will live into this block, we must adapt the pressure.
1267                  * The new pressure is the MAX of:
1268                  * 1) the total block pressure
1269                  * 2) the pressure so far + the front pressure increase + 1
1270                  *
1271                  * If the second is larger than the first,
1272                  * we have to increment the total block pressure and hence
1273                  * save the old pressure to restore it in case of failing to
1274                  * bring the variable into the block in a register.
1275                  */
1276                 trans = trans_begin(ges);
1277                 bs    = new_block_state(ges, bi);
1278                 pressure_inc = MAX(bs->pressure, better_spill_loc ? front_pressure : pressure_upto_use + 1);
1279                 bs->pressure = pressure_inc;
1280
1281
1282                 assert(bi->pressure <= k);
1283                 DBG((dbg, DBG_GLOBAL, "\ttrans in var %+F, version %x\n", irn, ges->version));
1284                 bring_in_costs = can_bring_in(ges, bl, irn, local_costs, 1);
1285                 DBG((dbg, DBG_GLOBAL, "\tbring in: %f, local: %f\n", bring_in_costs, local_costs));
1286
1287                 /*
1288                  * Following cases can now occur:
1289                  * 1) There is room and costs ok
1290                  * 2) Cannot bring to use but can spill at begin and costs are ok
1291                  * 3) neither of both worked.
1292                  *
1293                  * following actions can be taken:
1294                  * a) commit changes
1295                  * b) mark phi as succeeded if node was phi
1296                  * c) insert reload at use location
1297                  * d) give a spill location hint
1298                  *
1299                  * this is the case/action matrix
1300                  *   | abcd
1301                  * --+----------
1302                  * 1 | XX
1303                  * 2 | XXXX
1304                  * 3 |   X
1305                  */
1306
1307                 /* the costs were acceptable... */
1308                 if (bring_in_costs < local_costs) {
1309                         int check = 0;
1310                         bring_in_t *iter;
1311
1312                         /*
1313                          * case 1 and first part of case 2:
1314                          * commit all the changes done. this manifests the bring-in action.
1315                          * if the transport-in was a phi (that is actually used in block)
1316                          * mark it in the succ_phis set to *not* phi spill it.
1317                          */
1318                         materialize_and_commit_end_state(ges);
1319                         if (is_local_phi(bl, irn))
1320                                 bitset_add_irn(ges->succ_phis, irn);
1321
1322                         DBG((dbg, DBG_GLOBAL, "\t-> bring it in.", pressure_inc));
1323
1324                         /* second half of case 2 */
1325                         if (pressure_upto_use >= k) {
1326                                 DBG((dbg, DBG_GLOBAL, "\t-> use blocked. local reload: %+F, try spill at: %+F\n",
1327                                                         br->first_use, better_spill_loc));
1328                                 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1329                                 be_add_spill(env->senv, irn, better_spill_loc);
1330                                 ir_nodeset_insert(env->extra_spilled, irn);
1331                         }
1332
1333                         /*
1334                          * go from the last bring in use to the first and add all the variables
1335                          * which additionally live through the block to their pressure.
1336                          * at the point were the actually treated use is, we have to increase
1337                          * the pressure by one more as the brought in value starts to count.
1338                          * Finally, adjust the front pressure as well.
1339                          */
1340                         pressure_inc = 0;
1341                         list_for_each_entry_reverse(bring_in_t, iter, &bi->br_head, list) {
1342                                 if (iter == br)
1343                                         pressure_inc += pressure_upto_use < k;
1344                                 iter->sect_pressure += pressure_inc;
1345                                 check = MAX(check, iter->sect_pressure);
1346                                 DBG((dbg, DBG_GLOBAL, "\tinc section pressure of %+F by %d to %d\n", iter->first_use, pressure_inc, iter->sect_pressure));
1347                         }
1348                         bi->front_pressure += pressure_inc;
1349                         assert(MAX(check, bi->front_pressure) <= bi->pressure);
1350                         DBG((dbg, DBG_GLOBAL, "\t-> result: p: %d, fp: %d\n", bi->pressure, bi->front_pressure));
1351                 }
1352
1353                 /* case 3: nothing worked. insert normal reload and rollback. */
1354                 else {
1355                         DBG((dbg, DBG_GLOBAL, "\t-> bring in was too expensive. local reload: %+F\n", br->first_use));
1356                         be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1357                         bitset_add_irn(env->spilled, irn);
1358                         trans_rollback(ges, &trans);
1359                 }
1360         }
1361
1362         /* there was no opportunity for optimization at all. reload and be sad ...  */
1363         else {
1364                 DBG((dbg, DBG_GLOBAL, "\t-> can\'t do anything but reload before %+F\n", br->first_use));
1365                 be_add_reload(env->senv, irn, br->first_use, env->cls, 1);
1366                 bitset_add_irn(env->spilled, irn);
1367         }
1368
1369         DBG((dbg, DBG_GLOBAL, "\n"));
1370
1371         /* reset the obstack and create a new version. */
1372         obstack_free(&ges->obst, reset_level);
1373         ges->version = ver_make_newer(ges->version);
1374 }
1375
1376 static bring_in_t **determine_global_order(belady_env_t *env)
1377 {
1378         bring_in_t **res;
1379         bring_in_t *elm;
1380         int i, n = 0;
1381
1382         for (i = env->n_blocks - 1; i >= 0; --i) {
1383                 block_info_t *bi = get_block_info(env->blocks[i]);
1384                 list_for_each_entry(bring_in_t, elm, &bi->br_head, list) {
1385                         obstack_ptr_grow(&env->ob, elm);
1386                         n += 1;
1387                 }
1388         }
1389
1390         obstack_ptr_grow(&env->ob, NULL);
1391         res = obstack_finish(&env->ob);
1392         qsort(res, n, sizeof(res[0]), bring_in_cmp);
1393         return res;
1394 }
1395
1396
1397
1398
1399 static void global_assign(belady_env_t *env)
1400 {
1401         ir_nodeset_iterator_t iter;
1402         global_end_state_t ges;
1403         bring_in_t **br;
1404         ir_node *irn;
1405         int i, j;
1406
1407         /*
1408          * sort the blocks according to execution frequency.
1409          * That's not necessary for belady() but for the global pass later on.
1410          */
1411         qsort(env->blocks, env->n_blocks, sizeof(env->blocks[0]), block_freq_dfs_gt);
1412
1413         memset(&ges, 0, sizeof(ges));
1414         obstack_init(&ges.obst);
1415         ges.env          = env;
1416         ges.version      = ver_make_newer(ver_oldest);
1417         ges.succ_phis    = bitset_irg_obstack_alloc(&ges.obst, env->irg);
1418         ges.committed    = bitset_obstack_alloc(&ges.obst, env->n_blocks);
1419         ges.bs_tops      = OALLOCN(&ges.obst, block_state_t*, env->n_blocks);
1420         ges.bs_tops_vers = OALLOCN(&ges.obst, unsigned,       env->n_blocks);
1421
1422         /* invalidate all state stack pointer versions */
1423         for (i = 0; i < env->n_blocks; ++i) {
1424                 block_info_t *bi = get_block_info(env->blocks[i]);
1425                 ges.bs_tops_vers[i] = ver_oldest;
1426
1427                 /* Set all block end sets entries to the youngest version */
1428                 for (j = workset_get_length(bi->ws_end) - 1; j >= 0; --j)
1429                         workset_set_version(bi->ws_end, j, ver_youngest);
1430         }
1431
1432         /* determine order and optimize them */
1433         for (br = determine_global_order(env); *br; ++br)
1434                 optimize_variable(&ges, *br);
1435
1436         /*
1437          * Now we spill phis which cannot be kept since they were replaced
1438          * by reloads at the block entrances.
1439          */
1440         for (i = 0; i < env->n_blocks; ++i) {
1441                 ir_node *bl = env->blocks[i];
1442                 ir_node *irn;
1443
1444                 sched_foreach(bl, irn) {
1445                         if (!is_Phi(irn))
1446                                 break;
1447
1448                         if (arch_irn_consider_in_reg_alloc(env->cls, irn)
1449                                         && !bitset_contains_irn(ges.succ_phis, irn))
1450                                 be_spill_phi(env->senv, irn);
1451                 }
1452         }
1453
1454         /* check dominance for specially spilled nodes. */
1455         foreach_ir_nodeset (env->extra_spilled, irn, iter)
1456                 make_spill_locations_dominate_irn(env->senv, irn);
1457 }
1458
1459 static void collect_blocks(ir_node *bl, void *data)
1460 {
1461         belady_env_t *env = data;
1462         ++env->n_blocks;
1463         obstack_ptr_grow(&env->ob, bl);
1464 }
1465
1466 /**
1467  * Do spilling for a register class on a graph using the belady heuristic.
1468  * In the transformed graph, the register pressure never exceeds the number
1469  * of available registers.
1470  *
1471  * @param birg  The backend graph
1472  * @param cls   The register class to spill
1473  */
1474 void be_spill_belady(be_irg_t *birg, const arch_register_class_t *cls)
1475 {
1476         ir_graph *irg = be_get_birg_irg(birg);
1477         belady_env_t env;
1478         int i, n_regs;
1479
1480         /* some special classes contain only ignore regs, nothing to do then */
1481         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
1482         if(n_regs == 0)
1483                 return;
1484
1485         be_clear_links(irg);
1486
1487         /* init belady env */
1488         obstack_init(&env.ob);
1489         env.irg        = irg;
1490         env.arch       = be_get_birg_arch_env(birg);
1491         env.cls        = cls;
1492         env.lv         = be_get_birg_liveness(birg);
1493         env.dfs        = env.lv->dfs;
1494         env.n_regs     = n_regs;
1495         env.ws         = new_workset(&env, &env.ob);
1496         env.senv       = be_new_spill_env(birg);
1497         env.ef         = be_get_birg_exec_freq(birg);
1498         env.spilled    = bitset_irg_obstack_alloc(&env.ob, irg);
1499         env.extra_spilled = ir_nodeset_new(64);
1500         env.n_blocks   = 0;
1501
1502         irg_block_walk_graph(irg, NULL, collect_blocks, &env);
1503         obstack_ptr_grow(&env.ob, NULL);
1504         env.blocks = obstack_finish(&env.ob);
1505
1506         /* renumbering in the blocks gives nicer debug output as number are smaller. */
1507 #ifdef DEBUG_libfirm
1508         for (i = 0; i < env.n_blocks; ++i)
1509                 sched_renumber(env.blocks[i]);
1510 #endif
1511
1512         /* Fix high register pressure in blocks with belady algorithm */
1513         for (i = 0; i < env.n_blocks; ++i)
1514                 belady(&env, i);
1515
1516         global_assign(&env);
1517
1518         /* check dominance for specially spilled nodes. */
1519         {
1520                 ir_nodeset_iterator_t iter;
1521                 ir_node *irn;
1522
1523                 foreach_ir_nodeset (env.extra_spilled, irn, iter)
1524                         make_spill_locations_dominate_irn(env.senv, irn);
1525         }
1526
1527         /* Insert spill/reload nodes into the graph and fix usages */
1528         be_insert_spills_reloads(env.senv);
1529
1530         /* clean up */
1531         be_delete_spill_env(env.senv);
1532         ir_nodeset_del(env.extra_spilled);
1533
1534         obstack_free(&env.ob, NULL);
1535 }
1536
1537 void be_init_spillbelady2(void)
1538 {
1539         lc_opt_entry_t *be_grp    = lc_opt_get_grp(firm_opt_get_root(), "be");
1540         lc_opt_entry_t *spill_grp = lc_opt_get_grp(be_grp, "spill");
1541         lc_opt_entry_t *bel2_grp  = lc_opt_get_grp(spill_grp, "belady2");
1542
1543         static be_spiller_t belady_spiller = {
1544                 be_spill_belady
1545         };
1546
1547         lc_opt_add_table(bel2_grp, options);
1548         be_register_spiller("belady2", &belady_spiller);
1549         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady2");
1550 }
1551
1552 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady2);