nsz Git - libfirm/blob - ir/be/bespillbelady3.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief       MIN-algorithm with some twists (aka belady spiller v3)
  23  * @author      Matthias Braun
  24  * @date        15.01.2008
  25  * @version     $Id$
  26  *
  27  * TODO:
  28  *   - handle phis correctly, decide whether we should spill them ~ok
  29  *   - merge multiple start worksets of blocks ~ok
  30  *   - how to and when to do the tentative phase...
  31  */
  32 #ifdef HAVE_CONFIG_H
  33 #include "config.h"
  34 #endif
  35
  36 #include <stdbool.h>
  37
  38 #include "debug.h"
  39 #include "list.h"
  40 #include "pdeq.h"
  41
  42 #include "irnode_t.h"
  43 #include "irprintf.h"
  44 #include "iredges_t.h"
  45 #include "irloop_t.h"
  46 #include "irgwalk.h"
  47 #include "execfreq.h"
  48
  49 #include "bemodule.h"
  50 #include "bespill.h"
  51 #include "beutil.h"
  52 #include "bespilloptions.h"
  53 #include "besched_t.h"
  54 #include "be_t.h"
  55
  56 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
  57
  58 typedef struct loop_edge_t loop_edge_t;
  59 struct loop_edge_t {
  60         ir_node     *block;
  61         int          pos;
  62         loop_edge_t *next;
  63 };
  64
  65 typedef struct loop_info_t loop_info_t;
  66 struct loop_info_t {
  67         ir_loop     *loop;
  68         loop_edge_t *entry_edges;
  69         loop_edge_t *exit_edges;
  70         size_t       max_register_pressure;
  71 };
  72
  73 typedef struct worklist_entry_t worklist_entry_t;
  74 struct worklist_entry_t {
  75         struct list_head  head;
  76         ir_node          *value;
  77         ir_node          *reload_point;
  78         ir_loop          *unused_livethrough_loop;
  79 };
  80
  81 typedef struct worklist_t worklist_t;
  82 struct worklist_t {
  83         struct list_head  live_values;
  84         size_t            n_live_values;
  85 };
  86
  87 typedef struct block_info_t block_info_t;
  88 struct block_info_t {
  89         worklist_t *start_worklist;
  90         worklist_t *end_worklist;
  91 };
  92
  93 static const arch_env_t            *arch_env;
  94 static const arch_register_class_t *cls;
  95 static struct obstack               obst;
  96 static spill_env_t                 *senv;
  97 static size_t                       n_regs;
  98 static size_t                       max_register_pressure;
  99 static bool                         tentative_mode;
 100 static bool                         should_have_reached_fixpoint;
 101 static ir_exec_freq                *exec_freq;
 102
 103 static worklist_t *new_worklist(void)
 104 {
 105         worklist_t *worklist = obstack_alloc(&obst, sizeof(worklist[0]));
 106         memset(worklist, 0, sizeof(worklist[0]));
 107
 108         INIT_LIST_HEAD(&worklist->live_values);
 109         worklist->n_live_values    = 0;
 110
 111         return worklist;
 112 }
 113
 114 static void mark_irn_not_visited(ir_node *node)
 115 {
 116         set_irn_visited(node, get_irg_visited(current_ir_graph) - 1);
 117 }
 118
 119 static bool worklist_contains(const ir_node *node)
 120 {
 121         return irn_visited(node);
 122 }
 123
 124 static block_info_t *get_block_info(ir_node *block)
 125 {
 126         block_info_t *info = get_irn_link(block);
 127         if (info != NULL)
 128                 return info;
 129
 130         info = obstack_alloc(&obst, sizeof(info[0]));
 131         memset(info, 0, sizeof(info[0]));
 132         set_irn_link(block, info);
 133         return info;
 134 }
 135
 136 static void deactivate_worklist(const worklist_t *worklist)
 137 {
 138         struct list_head *entry;
 139
 140         list_for_each(entry, &worklist->live_values) {
 141                 worklist_entry_t *wl_entry
 142                         = list_entry(entry, worklist_entry_t, head);
 143                 assert(worklist_contains(wl_entry->value));
 144                 mark_irn_not_visited(wl_entry->value);
 145                 set_irn_link(wl_entry->value, NULL);
 146         }
 147 }
 148
 149 static void activate_worklist(const worklist_t *worklist)
 150 {
 151         struct list_head *entry;
 152
 153         list_for_each(entry, &worklist->live_values) {
 154                 worklist_entry_t *wl_entry
 155                         = list_entry(entry, worklist_entry_t, head);
 156                 assert(!worklist_contains(wl_entry->value));
 157                 mark_irn_visited(wl_entry->value);
 158                 set_irn_link(wl_entry->value, wl_entry);
 159         }
 160 }
 161
 162 /**
 163  * duplicate a worklist and directly activates it
 164  */
 165 static worklist_t *duplicate_activate_worklist(const worklist_t *worklist,
 166                 ir_node *block, ir_node *succ_block, int succ_pos)
 167 {
 168         ir_node          *reload_point  = NULL;
 169         size_t            n_live_values = 0;
 170         worklist_t       *new_worklist;
 171         struct list_head *entry;
 172
 173         if (succ_block != NULL &&
 174                         (get_Block_n_cfgpreds(succ_block) > 1
 175                          || get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) > 1)) {
 176                 reload_point = be_get_end_of_block_insertion_point(block);
 177         }
 178
 179         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
 180         INIT_LIST_HEAD(&new_worklist->live_values);
 181
 182         list_for_each(entry, &worklist->live_values) {
 183                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
 184                 ir_node          *value     = wl_entry->value;
 185                 worklist_entry_t *new_entry;
 186
 187                 if (is_Phi(value) && get_nodes_block(value) == succ_block) {
 188                         value = get_Phi_pred(value, succ_pos);
 189
 190                         /* can happen for unknown phi preds */
 191                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
 192                                 continue;
 193                 }
 194
 195                 new_entry = obstack_alloc(&obst, sizeof(new_entry[0]));
 196                 memset(new_entry, 0, sizeof(new_entry[0]));
 197
 198                 new_entry->value = value;
 199                 if (reload_point != NULL) {
 200                         new_entry->reload_point = reload_point;
 201                 } else {
 202                         new_entry->reload_point = wl_entry->reload_point;
 203                 }
 204
 205                 if (irn_not_visited(value)) {
 206                         list_add_tail(&new_entry->head, &new_worklist->live_values);
 207                         ++n_live_values;
 208
 209                         mark_irn_visited(value);
 210                         set_irn_link(value, new_entry);
 211                 }
 212         }
 213         new_worklist->n_live_values = n_live_values;
 214
 215         return new_worklist;
 216 }
 217
 218 static worklist_t *duplicate_worklist(const worklist_t *worklist)
 219 {
 220         worklist_t       *new_worklist;
 221         struct list_head *entry;
 222
 223         new_worklist = obstack_alloc(&obst, sizeof(new_worklist[0]));
 224         INIT_LIST_HEAD(&new_worklist->live_values);
 225         new_worklist->n_live_values = worklist->n_live_values;
 226
 227         list_for_each(entry, &worklist->live_values) {
 228                 worklist_entry_t *wl_entry  = list_entry(entry, worklist_entry_t, head);
 229                 worklist_entry_t *new_entry
 230                         = obstack_alloc(&obst, sizeof(new_entry[0]));
 231
 232                 memcpy(new_entry, wl_entry, sizeof(new_entry[0]));
 233                 list_add_tail(&new_entry->head, &new_worklist->live_values);
 234         }
 235
 236         return new_worklist;
 237 }
 238
 239 #ifdef DEBUG_libfirm
 240 static void print_worklist(const worklist_t *worklist, int level)
 241 {
 242         struct list_head *entry;
 243
 244         DB((dbg, level, "%d values: ", worklist->n_live_values));
 245         list_for_each(entry, &worklist->live_values) {
 246                 worklist_entry_t *wl_entry
 247                         = list_entry(entry, worklist_entry_t, head);
 248
 249                 DB((dbg, level, "%+F ", wl_entry->value));
 250         }
 251 }
 252 #endif
 253
 254
 255
 256 static void clear_loop_info(ir_loop *loop)
 257 {
 258         int n_elements = get_loop_n_elements(loop);
 259         int i;
 260
 261         loop->link = NULL;
 262         for (i = 0; i < n_elements; ++i) {
 263                 loop_element element = get_loop_element(loop, i);
 264                 if (*element.kind != k_ir_loop)
 265                         continue;
 266
 267                 clear_loop_info(element.son);
 268         }
 269 }
 270
 271 static loop_info_t *get_loop_info(ir_loop *loop)
 272 {
 273         loop_info_t *info = loop->link;
 274         if (info != NULL)
 275                 return info;
 276
 277         info = obstack_alloc(&obst, sizeof(info[0]));
 278         memset(info, 0, sizeof(info[0]));
 279         info->loop = loop;
 280         loop->link = info;
 281         return info;
 282 }
 283
 284 /**
 285  * constructs loop in+out edges when called in a block walker
 286  */
 287 static void construct_loop_edges(ir_node *block, void *data)
 288 {
 289         int      n_cfgpreds = get_Block_n_cfgpreds(block);
 290         ir_loop *loop       = get_irn_loop(block);
 291         int      i;
 292
 293         (void) data;
 294
 295         for (i = 0; i < n_cfgpreds; ++i) {
 296                 ir_node     *cfgpred_block = get_Block_cfgpred_block(block, i);
 297                 ir_loop     *cfgpred_loop  = get_irn_loop(cfgpred_block);
 298                 loop_edge_t *edge;
 299                 bool        is_exit_edge;
 300                 ir_loop     *l, *goal;
 301
 302                 if (cfgpred_loop == loop)
 303                         continue;
 304
 305                 /* critical edges are splitted, so we can't jump from 1 loop
 306                  * directly into another without going out of it */
 307                 assert(get_loop_depth(cfgpred_loop) != get_loop_depth(loop));
 308
 309                 /* edge out of loop */
 310                 if (get_loop_depth(cfgpred_loop) > get_loop_depth(loop)) {
 311                         is_exit_edge = true;
 312                         l            = cfgpred_loop;
 313                         goal         = loop;
 314                 } else {
 315                         is_exit_edge = false;
 316                         l            = loop;
 317                         goal         = cfgpred_loop;
 318                 }
 319
 320                 /* this might be a jump out/in multiple loops */
 321                 do {
 322                         loop_info_t *l_info = get_loop_info(l);
 323
 324                         edge = obstack_alloc(&obst, sizeof(edge[0]));
 325                         memset(edge, 0, sizeof(edge[0]));
 326                         edge->block = block;
 327                         edge->pos   = i;
 328
 329                         if (is_exit_edge) {
 330                                 ir_fprintf(stderr, "Loop %p exit edge %+F:%d\n",
 331                                            l, block, i);
 332                                 edge->next         = l_info->exit_edges;
 333                                 l_info->exit_edges = edge;
 334                                 assert(get_loop_depth(loop) < get_loop_depth(l));
 335                         } else {
 336                                 ir_fprintf(stderr, "Loop %p entry edge %+F:%d\n",
 337                                            l, block, i);
 338                                 edge->next          = l_info->entry_edges;
 339                                 l_info->entry_edges = edge;
 340                                 assert(get_loop_depth(cfgpred_loop) < get_loop_depth(l));
 341                         }
 342                         l = get_loop_outer_loop(l);
 343                 } while(l != goal);
 344         }
 345 }
 346
 347
 348
 349
 350 static void place_reload(worklist_entry_t *entry)
 351 {
 352         if (tentative_mode)
 353                 return;
 354
 355         DB((dbg, LEVEL_1, "reload of %+F before %+F\n", entry->value,
 356                 entry->reload_point));
 357         assert(entry->reload_point != NULL);
 358         be_add_reload(senv, entry->value, entry->reload_point, cls, 1);
 359         entry->reload_point = NULL;
 360 }
 361
 362 /**
 363  * makes sure the worklist contains not more than n_regs - room_needed entries
 364  */
 365 static void make_room(worklist_t *worklist, size_t room_needed)
 366 {
 367         int               i;
 368         int               spills_needed;
 369         struct list_head *entry;
 370
 371         spills_needed = worklist->n_live_values + room_needed - n_regs;
 372         if (spills_needed <= 0)
 373                 return;
 374
 375         entry = worklist->live_values.next;
 376         for(i = spills_needed; i > 0; --i) {
 377                 struct list_head *next = entry->next;
 378                 worklist_entry_t *wl_entry
 379                         = list_entry(entry, worklist_entry_t, head);
 380
 381                 assert(worklist_contains(wl_entry->value));
 382                 mark_irn_not_visited(wl_entry->value);
 383                 place_reload(wl_entry);
 384                 list_del(entry);
 385
 386                 entry = next;
 387         }
 388         worklist->n_live_values -= spills_needed;
 389 }
 390
 391 /**
 392  * a value was used, so bring it to the back of the worklist (which might
 393  * result in a spill of another value).
 394  */
 395 static void val_used(worklist_t *worklist, ir_node *value, ir_node *sched_point)
 396 {
 397         /* already in the worklist? move around, otherwise add at back */
 398         worklist_entry_t *entry = get_irn_link(value);
 399
 400         assert(arch_irn_consider_in_reg_alloc(arch_env, cls, value));
 401
 402         if (worklist_contains(value)) {
 403                 assert(entry != NULL);
 404
 405                 list_del(&entry->head);
 406         } else {
 407                 if (entry == NULL) {
 408                         entry        = obstack_alloc(&obst, sizeof(entry[0]));
 409                         memset(entry, 0, sizeof(entry[0]));
 410
 411                         entry->value = value;
 412                         set_irn_link(value, entry);
 413                 }
 414
 415                 ++worklist->n_live_values;
 416                 mark_irn_visited(value);
 417         }
 418
 419         entry->reload_point = sched_point;
 420         list_add_tail(&entry->head, &worklist->live_values);
 421 }
 422
 423 static void worklist_remove(worklist_t *worklist, ir_node *value)
 424 {
 425         worklist_entry_t *entry = get_irn_link(value);
 426         ir_fprintf(stderr, "removing %+F\n", value);
 427         assert(entry != NULL);
 428         list_del(&entry->head);
 429         --worklist->n_live_values;
 430
 431         assert(worklist_contains(value));
 432         mark_irn_not_visited(value);
 433 }
 434
 435 static void update_max_pressure(worklist_t *worklist)
 436 {
 437         if (worklist->n_live_values > max_register_pressure)
 438                 max_register_pressure = worklist->n_live_values;
 439 }
 440
 441 static void do_spilling(ir_node *block, worklist_t *worklist)
 442 {
 443         ir_node *node;
 444
 445         assert(worklist != NULL);
 446
 447         sched_foreach_reverse(block, node) {
 448                 int    i, arity;
 449                 size_t n_defs = 0;
 450
 451                 DB((dbg, LEVEL_2, "\t%+F... ", node));
 452                 update_max_pressure(worklist);
 453
 454                 if (is_Phi(node)) {
 455                         ir_node *node2;
 456                         /* TODO: if we have some free registers, then we could decide to
 457                          * not spill some phis (but not for phis where at least 1 input is
 458                          * themselfes) */
 459
 460                         /* we have to spill all phis that are not live */
 461                         sched_foreach_reverse_from(node, node2) {
 462                                 assert(is_Phi(node2));
 463
 464                                 if (worklist_contains(node2))
 465                                         continue;
 466                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, node2))
 467                                         continue;
 468
 469                                 if (!tentative_mode)
 470                                         be_spill_phi(senv, node2);
 471                         }
 472                         DB((dbg, LEVEL_2, "\n"));
 473                         break;
 474                 }
 475
 476                 /* remove values defined by this instruction from the workset. Values
 477                  * defined but not in the workset need free registers */
 478                 if (get_irn_mode(node) == mode_T) {
 479                         const ir_edge_t *edge;
 480
 481                         foreach_out_edge(node, edge) {
 482                                 ir_node *proj = get_edge_src_irn(edge);
 483                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, proj))
 484                                         continue;
 485                                 if (worklist_contains(proj)) {
 486                                         worklist_remove(worklist, proj);
 487                                 } else {
 488                                         ++n_defs;
 489                                 }
 490                         }
 491                 } else if (arch_irn_consider_in_reg_alloc(arch_env, cls, node)) {
 492                         if (worklist_contains(node)) {
 493                                 worklist_remove(worklist, node);
 494                         } else {
 495                                 n_defs = 1;
 496                         }
 497                 }
 498
 499                 /* make sure we have enough free registers for the spills */
 500                 make_room(worklist, n_defs);
 501
 502                 /* put all values used by the instruction into the workset */
 503                 arity = get_irn_arity(node);
 504                 for(i = 0; i < arity; ++i) {
 505                         ir_node *use = get_irn_n(node, i);
 506
 507                         if (!arch_irn_consider_in_reg_alloc(arch_env, cls, use))
 508                                 continue;
 509
 510                         val_used(worklist, use, node);
 511                 }
 512
 513                 /* we might have too many values in the worklist now and need to spill
 514                  * some */
 515                 make_room(worklist, 0);
 516
 517 #ifdef DEBUG_libfirm
 518                 print_worklist(worklist, LEVEL_2);
 519                 DB((dbg, LEVEL_2, "\n"));
 520 #endif
 521         }
 522
 523         update_max_pressure(worklist);
 524
 525 #ifdef DEBUG_libfirm
 526         DB((dbg, LEVEL_1, "worklist at begin of %+F:", block));
 527         print_worklist(worklist, LEVEL_1);
 528         DB((dbg, LEVEL_1, "\n"));
 529 #endif
 530 }
 531
 532 static bool worklists_equal(const worklist_t *wl1, const worklist_t *wl2)
 533 {
 534         const struct list_head *i1 = &wl1->live_values;
 535         const struct list_head *i2 = &wl2->live_values;
 536
 537         for ( ; i1 != &wl1->live_values && i2 != &wl2->live_values;
 538                         i1 = i1->next, i2 = i2->next) {
 539                 worklist_entry_t *entry1 = list_entry(i1, worklist_entry_t, head);
 540                 worklist_entry_t *entry2 = list_entry(i2, worklist_entry_t, head);
 541
 542                 if (entry1->value != entry2->value)
 543                         return false;
 544         }
 545         /* both lists at the end */
 546         if (i1 != &wl1->live_values || i2 != &wl2->live_values)
 547                 return false;
 548
 549         return true;
 550 }
 551
 552 static void process_block(ir_node *block, void *env)
 553 {
 554         block_info_t    *block_info      = NULL;
 555         worklist_t      *worklist        = NULL;
 556         double           best_execfreq   = -1;
 557         ir_node         *best_succ_block = NULL;
 558         int              best_pos        = -1;
 559         int              n_preds;
 560         const ir_edge_t *edge;
 561
 562         (void) env;
 563         DB((dbg, LEVEL_1, "Processing %+F\n", block));
 564
 565         /* construct worklist */
 566         foreach_block_succ(block, edge) {
 567                 ir_node *succ_block = get_edge_src_irn(edge);
 568                 double   execfreq   = get_block_execfreq(exec_freq, succ_block);
 569
 570                 if (execfreq > best_execfreq) {
 571                         block_info_t *block_info    = get_block_info(succ_block);
 572                         worklist_t   *succ_worklist = block_info->start_worklist;
 573                         if (succ_worklist != NULL) {
 574                                 best_execfreq   = execfreq;
 575                                 worklist        = succ_worklist;
 576                                 best_succ_block = succ_block;
 577                                 best_pos        = get_edge_src_pos(edge);
 578                         }
 579                 }
 580         }
 581         if (worklist == NULL) {
 582                 /* at least 1 successor block must have been processed unless it was
 583                  * the end block */
 584                 assert(tentative_mode || block == get_irg_end_block(get_irn_irg(block)));
 585
 586                 worklist = new_worklist();
 587         } else {
 588                 worklist = duplicate_activate_worklist(worklist, block, best_succ_block,
 589                                                        best_pos);
 590         }
 591
 592         block_info = get_block_info(block);
 593
 594 #ifdef DEBUG_libfirm
 595         DB((dbg, LEVEL_1, "worklist at end of %+F:", block));
 596         print_worklist(worklist, LEVEL_1);
 597         DB((dbg, LEVEL_1, "\n"));
 598
 599         if (should_have_reached_fixpoint) {
 600                 assert(worklists_equal(block_info->end_worklist, worklist));
 601         }
 602 #endif
 603         block_info->end_worklist = duplicate_worklist(worklist);
 604
 605         do_spilling(block, worklist);
 606         deactivate_worklist(worklist);
 607
 608 #ifdef DEBUG_libfirm
 609         if (should_have_reached_fixpoint) {
 610                 assert(worklists_equal(block_info->start_worklist, worklist));
 611         }
 612 #endif
 613         block_info->start_worklist = worklist;
 614
 615         /* we shouldn't have any live values left at the start block */
 616         n_preds = get_Block_n_cfgpreds(block);
 617         assert(n_preds != 0 || worklist->n_live_values == 0);
 618 }
 619
 620 typedef struct block_or_loop_t block_or_loop_t;
 621 struct block_or_loop_t {
 622         union {
 623                 ir_node *block;
 624                 ir_loop *loop;
 625         } v;
 626         bool is_loop;
 627 };
 628
 629 static block_or_loop_t *loop_blocks;
 630 static ir_loop         *current_loop;
 631
 632 static void find_blocks(ir_node *block);
 633
 634 static void find_in_loop(ir_loop *loop, ir_node *entry)
 635 {
 636         loop_info_t     *loop_info = get_loop_info(loop);
 637         block_or_loop_t block_or_loop;
 638         loop_edge_t     *edge;
 639
 640         /* simply mark 1 block in the loop to indicate that the loop was already
 641          * processed */
 642         ir_node *some_block = loop_info->entry_edges->block;
 643         if (Block_block_visited(some_block))
 644                 return;
 645
 646         block_or_loop.v.loop  = loop;
 647         block_or_loop.is_loop = true;
 648         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
 649
 650 #ifndef NDEBUG
 651         {
 652                 /* we should be 1 of the entry blocks */
 653                 loop_edge_t *edge  = loop_info->entry_edges;
 654                 bool         found = false;
 655                 for ( ; edge != NULL; edge = edge->next) {
 656                         if (edge->block == entry)
 657                                 found = true;
 658                 }
 659                 assert(found);
 660         }
 661 #endif
 662         /* check all loop successors */
 663         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
 664                 ir_node *succ      = edge->block;
 665                 ir_loop *succ_loop = get_irn_loop(succ);
 666
 667                 if (succ_loop == current_loop) {
 668                         find_blocks(succ);
 669                 } else {
 670                         assert(get_loop_depth(succ_loop) < get_loop_depth(current_loop));
 671                 }
 672         }
 673 }
 674
 675 static void find_blocks(ir_node *block)
 676 {
 677         const ir_edge_t *edge;
 678         block_or_loop_t block_or_loop;
 679
 680         if (Block_block_visited(block))
 681                 return;
 682
 683         block_or_loop.v.block = block;
 684         block_or_loop.is_loop = false;
 685
 686         ARR_APP1(block_or_loop_t, loop_blocks, block_or_loop);
 687         mark_Block_block_visited(block);
 688
 689         foreach_block_succ(block, edge) {
 690                 ir_node *succ = get_edge_src_irn(edge);
 691
 692                 /* is it part of the current loop? */
 693                 ir_loop *loop = get_irn_loop(succ);
 694                 if (loop != current_loop) {
 695                         /* a sub-loop? */
 696                         if (get_loop_depth(loop) > get_loop_depth(current_loop)) {
 697                                 find_in_loop(loop, succ);
 698                         } else {
 699                                 /* parent loop: we're not interested in the block */
 700                         }
 701                 } else {
 702                         find_blocks(succ);
 703                 }
 704         }
 705 }
 706
 707 /**
 708  * append an entry to a worklist. WARNING: The entry must not already be in the
 709  * worklist.
 710  */
 711 static void worklist_append(worklist_t *worklist, ir_node *value,
 712                             ir_node *reload_point,
 713                                                         ir_loop *unused_livethrough_loop)
 714 {
 715         worklist_entry_t *entry     = obstack_alloc(&obst, sizeof(entry[0]));
 716         memset(entry, 0, sizeof(entry[0]));
 717
 718         entry->value                   = value;
 719         entry->reload_point            = reload_point;
 720         entry->unused_livethrough_loop = unused_livethrough_loop;
 721         list_add_tail(&entry->head, &worklist->live_values);
 722         ++worklist->n_live_values;
 723         assert(worklist->n_live_values <= n_regs);
 724 }
 725
 726 static void push_unused_livethrough(loop_info_t *loop_info, ir_node *value)
 727 {
 728         /* add the value to all loop exit and entry blocks */
 729         loop_edge_t *edge = loop_info->exit_edges;
 730         for ( ; edge != NULL; edge = edge->next) {
 731                 ir_node            *block
 732                         = get_Block_cfgpred_block(edge->block, edge->pos);
 733                 const block_info_t *info     = get_block_info(block);
 734                 worklist_t         *worklist = info->end_worklist;
 735                 /* TODO: we need a smarter mechanism here, that makes the reloader place
 736                  * reload nodes on all loop exits... */
 737                 ir_node *reload_point = NULL;
 738
 739                 worklist_append(worklist, value, reload_point, loop_info->loop);
 740         }
 741         edge = loop_info->entry_edges;
 742         for ( ; edge != NULL; edge = edge->next) {
 743                 ir_node            *entry_block = edge->block;
 744                 const block_info_t *info        = get_block_info(entry_block);
 745                 worklist_t         *worklist    = info->start_worklist;
 746
 747                 ir_node *pred_block
 748                         = get_Block_cfgpred_block(entry_block, edge->pos);
 749                 ir_node *reload_point = be_get_end_of_block_insertion_point(pred_block);
 750
 751                 worklist_append(worklist, value, reload_point, loop_info->loop);
 752         }
 753
 754         set_irn_link(value, NULL);
 755         ++loop_info->max_register_pressure;
 756 }
 757
 758 static void push_unused_livethroughs(loop_info_t *loop_info)
 759 {
 760         loop_edge_t *edge;
 761
 762         /* we can only push unused livethroughs if register pressure inside the loop
 763          * was low enough */
 764         if (loop_info->max_register_pressure >= n_regs)
 765                 return;
 766
 767         /* find unused livethroughs: register pressure in the loop was low enough
 768          * which means that we had no spills which implies that at every point in
 769          * the loop all*/
 770         for (edge = loop_info->exit_edges; edge != NULL; edge = edge->next) {
 771                 ir_node            *block          = edge->block;
 772                 const block_info_t *info           = get_block_info(block);
 773                 worklist_t         *start_worklist = info->start_worklist;
 774                 ir_node            *exit_block;
 775                 const block_info_t *exit_info;
 776                 worklist_t         *end_worklist;
 777                 struct list_head   *entry;
 778
 779                 if (start_worklist == NULL)
 780                         continue;
 781
 782                 exit_block   = get_Block_cfgpred_block(edge->block, edge->pos);
 783                 exit_info    = get_block_info(exit_block);
 784                 end_worklist = exit_info->end_worklist;
 785
 786                 activate_worklist(end_worklist);
 787                 /* all values contained in the start_worklist, which are not available
 788                  * in the end_worklist, must be unused livethroughs */
 789
 790                 list_for_each(entry, &start_worklist->live_values) {
 791                         worklist_entry_t *wl_entry
 792                                 = list_entry(entry, worklist_entry_t, head);
 793                         ir_node *value = wl_entry->value;
 794
 795                         if (!worklist_contains(value)) {
 796                                 /* add it to all loop exits */
 797                                 ir_fprintf(stderr, "Found unused livethrough: %+F (regpressure in loop %d)\n", value, loop_info->max_register_pressure);
 798                                 push_unused_livethrough(loop_info, value);
 799                                 mark_irn_visited(value);
 800                         }
 801
 802                         if (loop_info->max_register_pressure >= n_regs)
 803                                 break;
 804                 }
 805                 deactivate_worklist(end_worklist);
 806         }
 807 }
 808
 809 static void process_block_or_loop(const block_or_loop_t *block_or_loop)
 810 {
 811         if (block_or_loop->is_loop) {
 812                 loop_info_t *loop_info = get_loop_info(block_or_loop->v.loop);
 813
 814                 if (loop_info->max_register_pressure > max_register_pressure)
 815                         max_register_pressure = loop_info->max_register_pressure;
 816
 817                 push_unused_livethroughs(loop_info);
 818                 return;
 819         }
 820         process_block(block_or_loop->v.block, NULL);
 821 }
 822
 823 static void process_loop(ir_loop *loop)
 824 {
 825         int         n_elements = get_loop_n_elements(loop);
 826         int         i, len;
 827         loop_info_t *loop_info;
 828         loop_edge_t *edge;
 829         ir_node     *some_block;
 830
 831         /* first handle all sub-loops */
 832         for (i = 0; i < n_elements; ++i) {
 833                 loop_element element = get_loop_element(loop, i);
 834                 if (*element.kind != k_ir_loop)
 835                         continue;
 836
 837                 process_loop(element.son);
 838         }
 839
 840         /* create a postorder of the blocks */
 841         loop_info = get_loop_info(loop);
 842         edge      = loop_info->entry_edges;
 843         if (edge != NULL) {
 844                 some_block = edge->block;
 845         } else {
 846                 assert(loop == get_irg_loop(current_ir_graph));
 847                 some_block = get_irg_start_block(current_ir_graph);
 848         }
 849
 850         loop_blocks  = NEW_ARR_F(block_or_loop_t,0);
 851         current_loop = loop;
 852
 853         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
 854         inc_irg_block_visited(current_ir_graph);
 855         find_blocks(some_block);
 856         /* for endless loops the end-block might be unreachable */
 857         if (loop == get_irg_loop(current_ir_graph)) {
 858                 find_blocks(get_irg_end_block(current_ir_graph));
 859         }
 860         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_VISITED);
 861
 862         fprintf(stderr, "Block List for loop %p\n", loop);
 863         len = ARR_LEN(loop_blocks);
 864         for (i = 0; i < len; ++i) {
 865                 block_or_loop_t *block_or_loop = &loop_blocks[i];
 866                 if (block_or_loop->is_loop) {
 867                         ir_fprintf(stderr, " L-%p", block_or_loop->v.loop);
 868                 } else {
 869                         ir_fprintf(stderr, " B-%+F", block_or_loop->v.block);
 870                 }
 871         }
 872         fprintf(stderr, "\n");
 873
 874         max_register_pressure = 0;
 875
 876         /* run1: tentative phase */
 877         tentative_mode = true;
 878         for (i = len-1; i >= 0; --i) {
 879                 process_block_or_loop(&loop_blocks[i]);
 880         }
 881
 882         /* run2: tentative phase - should reach fixpoint */
 883         tentative_mode = true;
 884         for (i = len-1; i >= 0; --i) {
 885                 process_block_or_loop(&loop_blocks[i]);
 886         }
 887
 888 #ifndef NDEBUG
 889         /* run3: tentative phase - check fixpoint */
 890         tentative_mode               = true;
 891         should_have_reached_fixpoint = true;
 892         for (i = len-1; i >= 0; --i) {
 893                 process_block_or_loop(&loop_blocks[i]);
 894         }
 895         should_have_reached_fixpoint = false;
 896 #endif
 897
 898         /* run4: add spills/reloads */
 899         tentative_mode = false;
 900         for (i = len-1; i >= 0; --i) {
 901                 process_block_or_loop(&loop_blocks[i]);
 902         }
 903
 904         loop_info->max_register_pressure = max_register_pressure;
 905         ir_fprintf(stderr, "Regpressure in loop %p: %u\n", loop,
 906                    (unsigned) max_register_pressure);
 907
 908         DEL_ARR_F(loop_blocks);
 909 }
 910
 911 static void fix_block_borders(ir_node *block, void *data)
 912 {
 913         block_info_t *block_info     = get_block_info(block);
 914         worklist_t   *start_worklist = block_info->start_worklist;
 915         int           n_cfgpreds     = get_Block_n_cfgpreds(block);
 916         ir_loop      *loop           = get_irn_loop(block);
 917         int           i;
 918
 919         (void) data;
 920         assert(start_worklist != NULL);
 921
 922         for (i = 0; i < n_cfgpreds; ++i) {
 923                 ir_node      *pred_block      = get_Block_cfgpred_block(block, i);
 924                 block_info_t *pred_block_info = get_block_info(pred_block);
 925                 worklist_t   *end_worklist    = pred_block_info->end_worklist;
 926                 ir_loop      *pred_loop       = get_irn_loop(pred_block);
 927                 bool          is_loop_entry   = false;
 928                 struct list_head *entry;
 929
 930                 assert(end_worklist != NULL);
 931
 932                 if (get_loop_depth(pred_loop) < get_loop_depth(loop)) {
 933                         is_loop_entry = true;
 934                 }
 935
 936                 /* reload missing values */
 937                 activate_worklist(end_worklist);
 938
 939                 list_for_each(entry, &start_worklist->live_values) {
 940                         worklist_entry_t *wl_entry
 941                                 = list_entry(entry, worklist_entry_t, head);
 942                         ir_node          *value = wl_entry->value;
 943
 944                         if (is_Phi(value) && get_nodes_block(value) == block) {
 945                                 value = get_irn_n(value, i);
 946
 947                                 /* we might have unknowns as argument for the phi */
 948                                 if (!arch_irn_consider_in_reg_alloc(arch_env, cls, value))
 949                                         continue;
 950                         }
 951
 952                         if (worklist_contains(value))
 953                                 continue;
 954                         if (wl_entry->unused_livethrough_loop != NULL && !is_loop_entry)
 955                                 continue;
 956
 957                         be_add_reload_on_edge(senv, value, block, i, cls, 1);
 958                 }
 959
 960                 deactivate_worklist(end_worklist);
 961         }
 962 }
 963
 964 static void be_spill_belady3(be_irg_t *birg, const arch_register_class_t *ncls)
 965 {
 966         ir_graph *irg = be_get_birg_irg(birg);
 967
 968         cls    = ncls;
 969         n_regs = cls->n_regs - be_put_ignore_regs(birg, cls, NULL);
 970
 971         /* shortcut for register classes with ignore regs only */
 972         if (n_regs == 0)
 973                 return;
 974
 975         arch_env  = be_get_birg_arch_env(birg);
 976         exec_freq = be_get_birg_exec_freq(birg);
 977
 978         be_clear_links(irg);
 979         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
 980                         | IR_RESOURCE_LOOP_LINK);
 981         inc_irg_visited(irg);
 982
 983         obstack_init(&obst);
 984         senv = be_new_spill_env(birg);
 985
 986         assure_cf_loop(irg);
 987         clear_loop_info(get_irg_loop(irg));
 988         irg_block_walk_graph(irg, construct_loop_edges, NULL, NULL);
 989
 990         process_loop(get_irg_loop(current_ir_graph));
 991
 992         irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
 993
 994         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED | IR_RESOURCE_IRN_LINK
 995                         | IR_RESOURCE_LOOP_LINK);
 996
 997         be_insert_spills_reloads(senv);
 998
 999         obstack_free(&obst, NULL);
1000
1001         /* clean up */
1002         be_delete_spill_env(senv);
1003 }
1004
1005 void be_init_spillbelady3(void)
1006 {
1007         static be_spiller_t belady3_spiller = {
1008                 be_spill_belady3
1009         };
1010
1011         be_register_spiller("belady3", &belady3_spiller);
1012         FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady3");
1013 }
1014
1015 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady3);