nsz Git - libfirm/blob - ir/opt/loop.c

   1 /*
   2  * Copyright (C) 1995-2010 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @author   Christian Helmer
  23  * @brief    loop inversion and loop unrolling
  24  *
  25  * @version  $Id$
  26  */
  27
  28 #include "config.h"
  29
  30 #include "iroptimize.h"
  31 #include "opt_init.h"
  32 #include "irnode.h"
  33 #include "debug.h"
  34 #include "error.h"
  35
  36 #include "ircons.h"
  37 #include "irgopt.h"
  38 #include "irgmod.h"
  39 #include "irgwalk.h"
  40 #include "irouts.h"
  41 #include "iredges.h"
  42 #include "irtools.h"
  43 #include "array_t.h"
  44 #include "beutil.h"
  45 #include "irpass.h"
  46 #include "irdom.h"
  47
  48 #include "irbackedge_t.h"
  49 #include "irphase_t.h"
  50 #include "irloop_t.h"
  51
  52
  53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
  54
  55 /* DBG print stats for every procedure.  */
  56 #define LOOP_OPT_STATS
  57 /* DBG: Ignore node limits and process every possible loop. */
  58 #define LOOP_IGNORE_NODE_LIMITS 1
  59
  60 /**
  61  * Convenience macro for iterating over every phi node of the given block.
  62  * Requires phi list per block.
  63  */
  64 #define for_each_phi(block, phi) \
  65         for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
  66
  67 #define for_each_phi_safe(head, phi, next) \
  68         for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
  69                         (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
  70
  71 /* Currently processed loop. */
  72 static ir_loop *cur_loop;
  73
  74 /* Flag for kind of unrolling. */
  75 typedef enum {
  76         constant,
  77         invariant
  78 } unrolling_kind_flag;
  79
  80 /* Condition for performing visiting a node during copy_walk. */
  81 typedef unsigned walker_condition(ir_node *);
  82
  83 /* Node and position of a predecessor. */
  84 typedef struct entry_edge {
  85         ir_node *node;
  86         int pos;
  87         ir_node *pred;
  88 } entry_edge;
  89
  90 /* Node info for unrolling. */
  91 typedef struct unrolling_node_info {
  92         ir_node **copies;
  93         /*ir_node **ins;*/
  94 } unrolling_node_info;
  95
  96 /* Outs of the nodes head. */
  97 static entry_edge *cur_head_outs;
  98
  99 /* Information about the loop head */
 100 static ir_node *loop_head = NULL;
 101 static unsigned loop_head_valid = 1;
 102
 103 /* List of all inner loops, that are processed. */
 104 static ir_loop **loops;
 105
 106 #ifdef LOOP_OPT_STATS
 107
 108 #define count_stats(val) (++val)
 109 #define print_stats() (do_print_stats())
 110 #define reset_stats() (do_reset_stats())
 111
 112 /* Stats */
 113 typedef struct loop_stats_t {
 114         unsigned loops;
 115         unsigned inverted;
 116         unsigned too_large;
 117         unsigned too_large_adapted;
 118         unsigned cc_limit_reached;
 119         unsigned calls_limit;
 120
 121         unsigned u_simple_counting_loop;
 122         unsigned constant_unroll;
 123         unsigned invariant_unroll;
 124
 125         unsigned unhandled;
 126 } loop_stats_t;
 127
 128 static loop_stats_t stats;
 129
 130 /* Set stats to sero */
 131 static void do_reset_stats(void)
 132 {
 133         memset(&stats, 0, sizeof(loop_stats_t));
 134 }
 135
 136 /* Print stats */
 137 static void do_print_stats(void)
 138 {
 139         DB((dbg, LEVEL_2, "---------------------------------------\n"));
 140         DB((dbg, LEVEL_2, "loops             :   %d\n",stats.loops));
 141         DB((dbg, LEVEL_2, "inverted          :   %d\n",stats.inverted));
 142         DB((dbg, LEVEL_2, "too_large         :   %d\n",stats.too_large));
 143         DB((dbg, LEVEL_2, "too_large_adapted :   %d\n",stats.too_large_adapted));
 144         DB((dbg, LEVEL_2, "cc_limit_reached  :   %d\n",stats.cc_limit_reached));
 145         DB((dbg, LEVEL_2, "calls_limit       :   %d\n",stats.calls_limit));
 146         DB((dbg, LEVEL_2, "u_simple_counting :   %d\n",stats.u_simple_counting_loop));
 147         DB((dbg, LEVEL_2, "constant_unroll   :   %d\n",stats.constant_unroll));
 148         DB((dbg, LEVEL_2, "invariant_unroll  :   %d\n",stats.invariant_unroll));
 149         DB((dbg, LEVEL_2, "=======================================\n"));
 150 }
 151 #else
 152 /* No stats */
 153 #define count_stats(val) ((void)0)
 154 #define print_stats() ((void)0)
 155 #define reset_stats() ((void)0)
 156
 157 #endif
 158
 159 /* Commandline parameters */
 160 typedef struct loop_opt_params_t {
 161         unsigned max_loop_size;         /* Maximum number of nodes */
 162         int      depth_adaption;        /* Loop nest depth adaption */
 163         unsigned allowed_calls;         /* Number of calls allowed */
 164         unsigned count_phi:1;           /* Count phi nodes */
 165         unsigned count_proj:1;          /* Count projections */
 166
 167         unsigned max_cc_size;           /* Maximum condition chain size */
 168
 169         unsigned allow_const_unrolling:1;
 170         unsigned allow_invar_unrolling:1;
 171
 172 } loop_opt_params_t;
 173
 174 static loop_opt_params_t opt_params;
 175
 176 /* Loop analysis informations */
 177 typedef struct loop_info_t {
 178         unsigned nodes;                 /* node count */
 179         unsigned ld_st;                 /* load and store nodes */
 180         unsigned calls;                 /* number of calls */
 181         unsigned cf_outs;               /* number of cf edges which leave the loop */
 182         entry_edge cf_out;              /* single loop leaving cf edge */
 183         int be_src_pos;                 /* position of the single own backedge in the head */
 184
 185         /* for inversion */
 186         unsigned cc_size;               /* nodes in the condition chain */
 187
 188         /* for unrolling */
 189         unsigned max_unroll;            /* Number of unrolls satisfying max_loop_size */
 190         unsigned exit_cond;                     /* 1 if condition==true exits the loop.  */
 191         unsigned latest_value:1;        /* 1 if condition is checked against latest counter value */
 192         unsigned needs_backedge:1;      /* 0 if loop is completely unrolled */
 193         unsigned decreasing:1;          /* Step operation is_Sub, or step is<0 */
 194
 195         /* IV informations of a simple loop */
 196         ir_node *start_val;
 197         ir_node *step;
 198         ir_node *end_val;
 199         ir_node *iteration_phi;
 200         ir_node *add;
 201
 202         tarval *count_tar;                                      /* Number of loop iterations */
 203
 204         ir_node *duff_cond;                                     /* Duff mod */
 205         unrolling_kind_flag unroll_kind;        /* constant or invariant unrolling */
 206 } loop_info_t;
 207
 208 /* Information about the current loop */
 209 static loop_info_t loop_info;
 210
 211 /* Outs of the condition chain (loop inversion). */
 212 static ir_node **cc_blocks;
 213 /* df/cf edges with def in the condition chain */
 214 static entry_edge *cond_chain_entries;
 215 /* Array of df loops found in the condition chain. */
 216 static entry_edge *head_df_loop;
 217 /* Number of blocks in cc */
 218 static unsigned inversion_blocks_in_cc;
 219
 220
 221 /* Cf/df edges leaving the loop.
 222  * Called entries here, as they are used to enter the loop with walkers. */
 223 static entry_edge *loop_entries;
 224 /* Number of unrolls to perform */
 225 static int unroll_nr;
 226 /* Phase is used to keep copies of nodes. */
 227 static ir_phase *phase;
 228
 229 /* Loop operations.  */
 230 typedef enum loop_op_t {
 231         loop_op_inversion,
 232         loop_op_unrolling,
 233         loop_op_peeling
 234 } loop_op_t;
 235
 236 /* Saves which loop operation to do until after basic tests. */
 237 static loop_op_t loop_op;
 238
 239 /************************************************************************/
 240
 241 /* Returns the maximum nodes for the given nest depth */
 242 static unsigned get_max_nodes_adapted(unsigned depth)
 243 {
 244         int adapt_permil = opt_params.depth_adaption * depth;
 245         unsigned permil_change;
 246
 247         if (adapt_permil < -1000)
 248                 return 0;
 249
 250         permil_change = 1000 + adapt_permil;
 251         return (opt_params.max_loop_size * permil_change) / 1000;
 252 }
 253
 254 /* Reset nodes link. For use with a walker. */
 255 static void reset_link(ir_node *node, void *env)
 256 {
 257         (void)env;
 258         set_irn_link(node, NULL);
 259 }
 260
 261 /* Returns 0 if the node or block is not in cur_loop. */
 262 static unsigned is_in_loop(ir_node *node)
 263 {
 264         return (get_irn_loop(get_block(node)) == cur_loop);
 265 }
 266
 267 /* Returns 0 if the given edge is not a backedge
 268  * with its pred in the cur_loop. */
 269 static unsigned is_own_backedge(ir_node *n, int pos)
 270 {
 271         return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
 272 }
 273
 274 /* Finds loop head and some loop_info as calls or else if necessary. */
 275 static void get_loop_info(ir_node *node, void *env)
 276 {
 277         unsigned node_in_loop, pred_in_loop;
 278         int i, arity;
 279         (void)env;
 280
 281         arity = get_irn_arity(node);
 282         for (i = 0; i < arity; i++) {
 283                 ir_node *pred = get_irn_n(node, i);
 284
 285                 pred_in_loop = is_in_loop(pred);
 286                 node_in_loop = is_in_loop(node);
 287
 288                 /* collect some loop information */
 289                 if (node_in_loop) {
 290                         if (is_Phi(node) && opt_params.count_phi)
 291                                 ++loop_info.nodes;
 292                         else if (is_Proj(node) && opt_params.count_proj)
 293                                 ++loop_info.nodes;
 294                         else if (!is_Confirm(node) && !is_Const(node) && !is_SymConst(node))
 295                                 ++loop_info.nodes;
 296
 297                         if (is_Load(node) || is_Store(node))
 298                                 ++loop_info.ld_st;
 299
 300                         if (is_Call(node))
 301                                 ++loop_info.calls;
 302                 }
 303
 304                 /* Find the loops head/the blocks with cfpred outside of the loop */
 305                 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
 306                         ir_node *cfgpred = get_Block_cfgpred(node, i);
 307
 308                         if (!is_in_loop(cfgpred)) {
 309                                 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
 310                                                         node, pred));
 311                                 /* another head? We do not touch this. */
 312                                 if (loop_head && loop_head != node) {
 313                                         loop_head_valid = 0;
 314                                 } else {
 315                                         loop_head = node;
 316                                 }
 317                         }
 318                 }
 319         }
 320 }
 321
 322 /* Finds all edges with users outside of the loop
 323  * and definition inside the loop. */
 324 static void get_loop_entries(ir_node *node, void *env)
 325 {
 326         unsigned node_in_loop, pred_in_loop;
 327         int i, arity;
 328         (void) env;
 329
 330         arity = get_irn_arity(node);
 331         for (i = 0; i < arity; ++i) {
 332                 ir_node *pred = get_irn_n(node, i);
 333
 334                 pred_in_loop = is_in_loop(pred);
 335                 node_in_loop = is_in_loop(node);
 336
 337                 if (pred_in_loop && !node_in_loop) {
 338                         entry_edge entry;
 339                         entry.node = node;
 340                         entry.pos = i;
 341                         entry.pred = pred;
 342                         ARR_APP1(entry_edge, loop_entries, entry);
 343                         /* Count cf outs */
 344                         if (is_Block(node)) {
 345                                 ++loop_info.cf_outs;
 346                                 loop_info.cf_out = entry;
 347                         }
 348                 }
 349         }
 350 }
 351
 352 /* ssa */
 353 static ir_node *ssa_second_def;
 354 static ir_node *ssa_second_def_block;
 355
 356 /**
 357  * Walks the graph bottom up, searching for definitions and creates phis.
 358  */
 359 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int first)
 360 {
 361         int i;
 362         int n_cfgpreds;
 363         ir_graph *irg;
 364         ir_node *phi;
 365         ir_node **in;
 366
 367         DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
 368
 369         /* Prevents creation of phi that would be bad anyway.
 370          * Dead and bad blocks. */
 371         if (get_irn_arity(block) < 1 || is_Bad(block)) {
 372                 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
 373                 return new_Bad();
 374         }
 375
 376         if (block == ssa_second_def_block && !first) {
 377                 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
 378                 return ssa_second_def;
 379         }
 380
 381         /* already processed this block? */
 382         if (irn_visited(block)) {
 383                 ir_node *value = (ir_node *) get_irn_link(block);
 384                 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
 385                 return value;
 386         }
 387
 388         irg = get_irn_irg(block);
 389         assert(block != get_irg_start_block(irg));
 390
 391         /* a Block with only 1 predecessor needs no Phi */
 392         n_cfgpreds = get_Block_n_cfgpreds(block);
 393         if (n_cfgpreds == 1) {
 394                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
 395                 ir_node *value;
 396
 397                 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
 398
 399                 value = search_def_and_create_phis(pred_block, mode, 0);
 400                 set_irn_link(block, value);
 401                 mark_irn_visited(block);
 402
 403                 return value;
 404         }
 405
 406         /* create a new Phi */
 407         NEW_ARR_A(ir_node*, in, n_cfgpreds);
 408         for (i = 0; i < n_cfgpreds; ++i)
 409                 in[i] = new_Unknown(mode);
 410
 411         phi = new_r_Phi(block, n_cfgpreds, in, mode);
 412         /* Important: always keep block phi list up to date. */
 413         add_Block_phi(block, phi);
 414         DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
 415         set_irn_link(block, phi);
 416         mark_irn_visited(block);
 417
 418         /* set Phi predecessors */
 419         for (i = 0; i < n_cfgpreds; ++i) {
 420                 ir_node *pred_val;
 421                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
 422                 assert(pred_block != NULL);
 423                 pred_val = search_def_and_create_phis(pred_block, mode, 0);
 424
 425                 assert(pred_val != NULL);
 426
 427                 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
 428                 set_irn_n(phi, i, pred_val);
 429         }
 430
 431         return phi;
 432 }
 433
 434
 435 /**
 436  * Given a set of values this function constructs SSA-form for the users of the
 437  * first value (the users are determined through the out-edges of the value).
 438  * Works without using the dominance tree.
 439  */
 440 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
 441                 ir_node *second_block, ir_node *second_val)
 442 {
 443         ir_graph *irg;
 444         ir_mode *mode;
 445         const ir_edge_t *edge;
 446         const ir_edge_t *next;
 447
 448         assert(orig_block && orig_val && second_block && second_val &&
 449                         "no parameter of construct_ssa may be NULL");
 450
 451         if (orig_val == second_val)
 452                 return;
 453
 454         irg = get_irn_irg(orig_val);
 455
 456         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
 457         inc_irg_visited(irg);
 458
 459         mode = get_irn_mode(orig_val);
 460         set_irn_link(orig_block, orig_val);
 461         mark_irn_visited(orig_block);
 462
 463         ssa_second_def_block = second_block;
 464         ssa_second_def       = second_val;
 465
 466         /* Only fix the users of the first, i.e. the original node */
 467         foreach_out_edge_safe(orig_val, edge, next) {
 468                 ir_node *user = get_edge_src_irn(edge);
 469                 int j = get_edge_src_pos(edge);
 470                 ir_node *user_block = get_nodes_block(user);
 471                 ir_node *newval;
 472
 473                 /* ignore keeps */
 474                 if (is_End(user))
 475                         continue;
 476
 477                 DB((dbg, LEVEL_5, "original user %N\n", user));
 478
 479                 if (is_Phi(user)) {
 480                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
 481                         newval = search_def_and_create_phis(pred_block, mode, 1);
 482                 } else {
 483                         newval = search_def_and_create_phis(user_block, mode, 1);
 484                 }
 485                 if (newval != user && !is_Bad(newval))
 486                         set_irn_n(user, j, newval);
 487         }
 488
 489         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
 490 }
 491
 492
 493 /***** Unrolling Helper Functions *****/
 494
 495 /* Assign the copy with index nr to node n */
 496 static void set_unroll_copy(ir_node *n, int nr, ir_node *cp)
 497 {
 498         assert(nr != 0 && "0 reserved");
 499
 500         unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n);
 501         if (! info) {
 502                 ir_node **arr;
 503
 504                 info = XMALLOCZ(unrolling_node_info);
 505                 arr = NEW_ARR_F(ir_node *, unroll_nr);
 506                 info->copies = arr;
 507                 memset(info->copies, 0, (unroll_nr) * sizeof(ir_node *));
 508
 509                 phase_set_irn_data(phase, n, info);
 510         }
 511         /* Original node */
 512         info->copies[0] = n;
 513
 514         info->copies[nr] = cp;
 515 }
 516
 517 /* Returns a nodes copy if it exists, else NULL. */
 518 static ir_node *get_unroll_copy(ir_node *n, int nr)
 519 {
 520         unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n);
 521         if (! info)
 522                 return NULL;
 523
 524         ir_node *cp = info->copies[nr];
 525         return cp;
 526 }
 527
 528
 529 /***** Inversion Helper Functions *****/
 530
 531 /* Sets copy cp of node n. */
 532 static void set_inversion_copy(ir_node *n, ir_node *cp)
 533 {
 534         phase_set_irn_data(phase, n, cp);
 535 }
 536
 537 /* Getter of copy of n for inversion */
 538 static ir_node *get_inversion_copy(ir_node *n)
 539 {
 540         ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
 541         return cp;
 542 }
 543
 544 /* Resets block mark for given node. For use with walker */
 545 static void reset_block_mark(ir_node *node, void * env)
 546 {
 547         (void) env;
 548
 549         if (is_Block(node))
 550                 set_Block_mark(node, 0);
 551 }
 552
 553 /* Returns mark of node, or its block if node is not a block.
 554  * Used in this context to determine if node is in the condition chain. */
 555 static unsigned is_nodes_block_marked(ir_node* node)
 556 {
 557         if (is_Block(node))
 558                 return get_Block_mark(node);
 559         else
 560                 return get_Block_mark(get_block(node));
 561 }
 562
 563 /* Extends a nodes ins by node new.
 564  * NOTE: This is slow if a node n needs to be extended more than once. */
 565 static int extend_irn(ir_node *n, ir_node *new)
 566 {
 567         ir_node **ins;
 568         int i;
 569         int arity = get_irn_arity(n);
 570         int new_arity = arity + 1;
 571
 572         NEW_ARR_A(ir_node *, ins, new_arity);
 573
 574         for(i = 0; i < arity; ++i) {
 575                 ins[i] = get_irn_n(n, i);
 576         }
 577         ins[i] = new;
 578
 579         set_irn_in(n, new_arity, ins);
 580         return arity;
 581 }
 582
 583 /* Extends a block by a copy of its pred at pos,
 584  * fixing also the phis in the same way. */
 585 static void extend_ins_by_copy(ir_node *block, int pos)
 586 {
 587         ir_node *new_in;
 588         ir_node *phi;
 589         ir_node *pred;
 590         assert(is_Block(block));
 591
 592         /* Extend block by copy of definition at pos */
 593         pred = get_irn_n(block, pos);
 594         new_in = get_inversion_copy(pred);
 595         DB((dbg, LEVEL_5, "Extend block %N by %N cp of %N\n", block, new_in, pred));
 596         extend_irn(block, new_in);
 597
 598         /* Extend block phis by copy of definition at pos */
 599         for_each_phi(block, phi) {
 600                 ir_node *pred, *cp;
 601
 602                 pred = get_irn_n(phi, pos);
 603                 cp = get_inversion_copy(pred);
 604                 /* If the phis in is not in the condition chain (eg. a constant),
 605                  * there is no copy. */
 606                 if (cp == NULL)
 607                         new_in = pred;
 608                 else
 609                         new_in = cp;
 610
 611                 DB((dbg, LEVEL_5, "Extend phi %N by %N cp of %N\n", phi, new_in, pred));
 612                 extend_irn(phi, new_in);
 613         }
 614 }
 615
 616 /* Returns the number of blocks backedges. With or without alien bes. */
 617 static int get_backedge_n(ir_node *block, unsigned with_alien)
 618 {
 619         int i;
 620         int be_n = 0;
 621         int arity = get_irn_arity(block);
 622
 623         assert(is_Block(block) && "We only required backedges of blocks.");
 624
 625         for (i = 0; i < arity; ++i) {
 626                 ir_node *pred = get_irn_n(block, i);
 627                 if (is_backedge(block, i) && (with_alien || is_in_loop(pred)))
 628                         ++be_n;
 629         }
 630         return be_n;
 631 }
 632
 633 /* Returns a raw copy of the given node.
 634  * Attributes are kept/set according to the needs of loop inversion. */
 635 static ir_node *copy_node(ir_node *node)
 636 {
 637         int i, arity;
 638         ir_node *cp;
 639
 640         cp = exact_copy(node);
 641         arity = get_irn_arity(node);
 642
 643         /* Keep backedge info */
 644         for (i = 0; i < arity; ++i) {
 645                 if (is_backedge(node, i))
 646                         set_backedge(cp, i);
 647         }
 648
 649         if (is_Block(cp)) {
 650                 /* We may not keep the old macroblock. */
 651                 set_Block_MacroBlock(cp, cp);
 652                 set_Block_mark(cp, 0);
 653         }
 654
 655         return cp;
 656 }
 657
 658
 659 /**
 660  * This walker copies all walked nodes.
 661  * If the walk_condition is true for a node, it is copied.
 662  * All nodes node_info->copy have to be NULL prior to every walk.
 663  * Order of ins is important for later usage.
 664  */
 665 static void copy_walk(ir_node *node, walker_condition *walk_condition,
 666                 ir_loop *set_loop)
 667 {
 668         int i;
 669         int arity;
 670         ir_node *cp;
 671         ir_node **cpin;
 672         ir_graph *irg = current_ir_graph;
 673
 674         /**
 675          * break condition and cycle resolver, creating temporary node copies
 676          */
 677         if (get_irn_visited(node) >= get_irg_visited(irg)) {
 678                 /* Here we rely on nodestate's copy being initialized with NULL */
 679                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
 680                 if (get_inversion_copy(node) == NULL) {
 681                         cp = copy_node(node);
 682                         set_inversion_copy(node, cp);
 683
 684                         DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
 685                 }
 686                 return;
 687         }
 688
 689         /* Walk */
 690         mark_irn_visited(node);
 691
 692         if (!is_Block(node)) {
 693                 ir_node *pred = get_nodes_block(node);
 694                 if (walk_condition(pred))
 695                         DB((dbg, LEVEL_5, "walk block %N\n", pred));
 696                 copy_walk(pred, walk_condition, set_loop);
 697         }
 698
 699         arity = get_irn_arity(node);
 700
 701         NEW_ARR_A(ir_node *, cpin, arity);
 702
 703         for (i = 0; i < arity; ++i) {
 704                 ir_node *pred = get_irn_n(node, i);
 705
 706                 if (walk_condition(pred)) {
 707                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
 708                         copy_walk(pred, walk_condition, set_loop);
 709                         cpin[i] = get_inversion_copy(pred);
 710                         DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
 711                                                 node, get_inversion_copy(pred), pred));
 712                 } else {
 713                         cpin[i] = pred;
 714                 }
 715         }
 716
 717         /* copy node / finalize temp node */
 718         if (get_inversion_copy(node) == NULL) {
 719                 /* No temporary copy existent */
 720                 cp = copy_node(node);
 721                 set_inversion_copy(node, cp);
 722                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
 723         } else {
 724                 /* temporary copy is existent but without correct ins */
 725                 cp = get_inversion_copy(node);
 726                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
 727         }
 728
 729         if (!is_Block(node)) {
 730                 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
 731
 732                 set_nodes_block(cp, cpblock );
 733                 if (is_Phi(cp))
 734                         add_Block_phi(cpblock, cp);
 735         }
 736
 737         /* Keeps phi list of temporary node. */
 738         set_irn_in(cp, ARR_LEN(cpin), cpin);
 739 }
 740
 741 /**
 742  * This walker copies all walked nodes.
 743  * If the walk_condition is true for a node, it is copied.
 744  * All nodes node_info->copy have to be NULL prior to every walk.
 745  * Order of ins is important for later usage.
 746  * Takes copy_index, to phase-link copy at specific index.
 747  */
 748 static void copy_walk_n(ir_node *node,
 749                 walker_condition *walk_condition, int copy_index)
 750 {
 751         int i;
 752         int arity;
 753         ir_node *cp;
 754         ir_node **cpin;
 755
 756         /**
 757          * break condition and cycle resolver, creating temporary node copies
 758          */
 759         if (irn_visited(node)) {
 760                 /* Here we rely on nodestate's copy being initialized with NULL */
 761                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
 762                 if (get_unroll_copy(node, copy_index) == NULL) {
 763                         ir_node *u;
 764                         u = copy_node(node);
 765                         set_unroll_copy(node, copy_index, u);
 766                         DB((dbg, LEVEL_5, "The TEMP unknown of %N is created %N\n", node, u));
 767                 }
 768                 return;
 769         }
 770
 771         /* Walk */
 772         mark_irn_visited(node);
 773
 774         if (!is_Block(node)) {
 775                 ir_node *block = get_nodes_block(node);
 776                 if (walk_condition(block))
 777                         DB((dbg, LEVEL_5, "walk block %N\n", block));
 778                 copy_walk_n(block, walk_condition, copy_index);
 779         }
 780
 781         arity = get_irn_arity(node);
 782         NEW_ARR_A(ir_node *, cpin, arity);
 783
 784         for (i = 0; i < arity; ++i) {
 785                 ir_node *pred = get_irn_n(node, i);
 786
 787                 if (walk_condition(pred)) {
 788                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
 789                         copy_walk_n(pred, walk_condition, copy_index);
 790                         cpin[i] = get_unroll_copy(pred, copy_index);
 791                 } else {
 792                         cpin[i] = pred;
 793                 }
 794         }
 795
 796         /* copy node / finalize temp node */
 797         cp = get_unroll_copy(node, copy_index);
 798         if (cp == NULL || is_Unknown(cp)) {
 799                 cp = copy_node(node);
 800                 set_unroll_copy(node, copy_index, cp);
 801                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
 802         } else {
 803                 /* temporary copy is existent but without correct ins */
 804                 cp = get_unroll_copy(node, copy_index);
 805                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
 806         }
 807
 808         if (!is_Block(node)) {
 809                 ir_node *cpblock = get_unroll_copy(get_nodes_block(node), copy_index);
 810
 811                 set_nodes_block(cp, cpblock );
 812                 if (is_Phi(cp))
 813                         add_Block_phi(cpblock, cp);
 814         }
 815
 816         /* Keeps phi list of temporary node. */
 817         set_irn_in(cp, ARR_LEN(cpin), cpin);
 818 }
 819
 820 /* Removes alle Blocks with non marked predecessors from the condition chain. */
 821 static void unmark_not_allowed_cc_blocks(void)
 822 {
 823         int blocks = ARR_LEN(cc_blocks);
 824         int i;
 825
 826         for(i = 0; i < blocks; ++i) {
 827                 ir_node *block = cc_blocks[i];
 828                 int a;
 829                 int arity = get_irn_arity(block);
 830
 831                 /* Head is an exception. */
 832                 if (block == loop_head)
 833                         continue;
 834
 835                 for(a = 0; a < arity; ++a) {
 836                         if (! is_nodes_block_marked(get_irn_n(block, a))) {
 837                                 set_Block_mark(block, 0);
 838                                 --inversion_blocks_in_cc;
 839                                 DB((dbg, LEVEL_5, "Removed %N from cc (blocks in cc %d)\n",
 840                                                 block, inversion_blocks_in_cc));
 841
 842                                 break;
 843                         }
 844                 }
 845         }
 846 }
 847
 848 /* Unmarks all cc blocks using cc_blocks except head. */
 849 static void unmark_cc_blocks(void)
 850 {
 851         int blocks = ARR_LEN(cc_blocks);
 852         int i;
 853
 854         for(i = 0; i < blocks; ++i) {
 855                 ir_node *block = cc_blocks[i];
 856
 857                 /* Head is an exception. */
 858                 if (block != loop_head)
 859                         set_Block_mark(block, 0);
 860         }
 861         inversion_blocks_in_cc = 1;
 862
 863         /* invalidate */
 864         loop_info.cc_size = 0;
 865 }
 866
 867 /**
 868  * Populates head_entries with (node, pred_pos) tuple
 869  * whereas the node's pred at pred_pos is in the cc but not the node itself.
 870  * Also finds df loops inside the cc.
 871  * Head and condition chain blocks have been marked previously.
 872  */
 873 static void get_head_outs(ir_node *node, void *env)
 874 {
 875         int i;
 876         int arity = get_irn_arity(node);
 877         (void) env;
 878
 879         for (i = 0; i < arity; ++i) {
 880                 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
 881                         entry_edge entry;
 882                         entry.node = node;
 883                         entry.pos = i;
 884                         /* Saving also predecessor seems redundant, but becomes
 885                          * necessary when changing position of it, before
 886                          * dereferencing it.*/
 887                         entry.pred = get_irn_n(node, i);
 888                         ARR_APP1(entry_edge, cur_head_outs, entry);
 889                 }
 890         }
 891
 892         arity = get_irn_arity(loop_head);
 893
 894         /* Find df loops inside the cc */
 895         if (is_Phi(node) && get_nodes_block(node) == loop_head) {
 896                 for (i = 0; i < arity; ++i) {
 897                         if (is_own_backedge(loop_head, i)) {
 898                                 if (is_nodes_block_marked(get_irn_n(node, i))) {
 899                                         entry_edge entry;
 900                                         entry.node = node;
 901                                         entry.pos = i;
 902                                         entry.pred = get_irn_n(node, i);
 903                                         ARR_APP1(entry_edge, head_df_loop, entry);
 904                                         DB((dbg, LEVEL_5, "Found incc assignment node %N @%d is pred %N, graph %N %N\n",
 905                                                         node, i, entry.pred, current_ir_graph, get_irg_start_block(current_ir_graph)));
 906                                 }
 907                         }
 908                 }
 909         }
 910 }
 911
 912 /**
 913  * Find condition chains, and add them to be inverted
 914  * A block belongs to the chain if a condition branches out of the loop.
 915  * (Some blocks need to be removed once again.)
 916  * Returns 1 if the given block belongs to the condition chain.
 917  */
 918 static unsigned find_condition_chain(ir_node *block)
 919 {
 920         const    ir_edge_t *edge;
 921         unsigned mark = 0;
 922         unsigned has_be = 0;
 923         unsigned jmp_only;
 924         unsigned nodes_n;
 925
 926         mark_irn_visited(block);
 927
 928         DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
 929
 930         /* Get node count */
 931         foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
 932                 ++nodes_n;
 933         }
 934
 935         /* Check if node count would exceed maximum cc size.
 936          * TODO
 937          * This is not optimal, as we search depth-first and break here,
 938          * continuing with another subtree. */
 939         if (loop_info.cc_size + nodes_n > opt_params.max_cc_size) {
 940                 set_Block_mark(block, 0);
 941                 return 0;
 942         }
 943
 944         /* Check if block only has a jmp instruction. */
 945         jmp_only = 1;
 946         foreach_out_edge(block, edge) {
 947                 ir_node *src = get_edge_src_irn(edge);
 948
 949                 if (! is_Block(src) && ! is_Jmp(src)) {
 950                         jmp_only = 0;
 951                 }
 952         }
 953
 954         /* Check cf outs if one is leaving the loop,
 955          * or if this node has a backedge. */
 956         foreach_block_succ(block, edge) {
 957                 ir_node *src = get_edge_src_irn(edge);
 958                 int pos = get_edge_src_pos(edge);
 959
 960                 if (! is_in_loop(src))
 961                         mark = 1;
 962
 963                 /* Inverting blocks with backedge outs leads to a cf edge
 964                  * from the inverted head, into the inverted head (skipping the body).
 965                  * As the body becomes the new loop head,
 966                  * this would introduce another loop in the existing loop.
 967                  * This loop inversion cannot cope with this case. */
 968                 if (is_backedge(src, pos)) {
 969                         has_be = 1;
 970                         break;
 971                 }
 972         }
 973
 974         /* We need all predecessors to already belong to the condition chain.
 975          * Example of wrong case:  * == in cc
 976          *
 977          *     Head*             ,--.
 978          *    /|   \            B   |
 979          *   / A*  B           /    |
 980          *  / /\   /          ?     |
 981          *   /   C*      =>      D  |
 982          *          /  D               Head |
 983          *     /               A  \_|
 984          *                      C
 985          */
 986         /* Collect blocks containing only a Jmp.
 987          * Do not collect blocks with backedge outs. */
 988         if ((jmp_only == 1 || mark == 1) && has_be == 0) {
 989                 set_Block_mark(block, 1);
 990                 ++inversion_blocks_in_cc;
 991                 loop_info.cc_size += nodes_n;
 992                 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
 993                 ARR_APP1(ir_node *, cc_blocks, block);
 994         } else {
 995                 set_Block_mark(block, 0);
 996         }
 997
 998         foreach_block_succ(block, edge) {
 999                 ir_node *src = get_edge_src_irn( edge );
1000
1001                 if (is_in_loop(src) && ! irn_visited(src))
1002                         find_condition_chain(src);
1003         }
1004
1005         return mark;
1006 }
1007
1008 /**
1009  * Rewires the copied condition chain. Removes backedges.
1010  * as this condition chain is prior to the loop.
1011  * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1012  * (loop_head is already fixed, we cannot rely on it.)
1013  */
1014 static void fix_copy_inversion(void)
1015 {
1016         ir_node *new_head;
1017         ir_node **ins;
1018         ir_node **phis;
1019         ir_node *phi, *next;
1020         ir_node *head_cp        = get_inversion_copy(loop_head);
1021         int arity                       = get_irn_arity(head_cp);
1022         int backedges           = get_backedge_n(head_cp, 0);
1023         int new_arity           = arity - backedges;
1024         int pos;
1025         int i;
1026
1027         NEW_ARR_A(ir_node *, ins, new_arity);
1028
1029         pos = 0;
1030         /* Remove block backedges */
1031         for(i = 0; i < arity; ++i) {
1032                 if (!is_backedge(head_cp, i))
1033                         ins[pos++] = get_irn_n(head_cp, i);
1034         }
1035
1036         new_head = new_Block(new_arity, ins);
1037
1038         phis = NEW_ARR_F(ir_node *, 0);
1039
1040         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1041                 ir_node *new_phi;
1042                 NEW_ARR_A(ir_node *, ins, new_arity);
1043                 pos = 0;
1044                 for(i = 0; i < arity; ++i) {
1045                         if (!is_backedge(head_cp, i))
1046                                 ins[pos++] = get_irn_n(phi, i);
1047                 }
1048                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1049                                 new_head, new_arity, ins,
1050                                 get_irn_mode(phi));
1051                 ARR_APP1(ir_node *, phis, new_phi);
1052         }
1053
1054         pos = 0;
1055         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1056                 exchange(phi, phis[pos++]);
1057         }
1058
1059         exchange(head_cp, new_head);
1060
1061         DEL_ARR_F(phis);
1062 }
1063
1064 /* Puts the original condition chain at the end of the loop,
1065  * subsequently to the body.
1066  * Relies on block phi list and correct backedges.
1067  */
1068 static void fix_head_inversion(void)
1069 {
1070         ir_node *new_head;
1071         ir_node **ins;
1072         ir_node *phi, *next;
1073         ir_node **phis;
1074         int arity                       = get_irn_arity(loop_head);
1075         int backedges           = get_backedge_n(loop_head, 0);
1076         int new_arity           = backedges;
1077         int pos;
1078         int i;
1079
1080         NEW_ARR_A(ir_node *, ins, new_arity);
1081
1082         pos = 0;
1083         /* Keep only backedges */
1084         for(i = 0; i < arity; ++i) {
1085                 if (is_own_backedge(loop_head, i))
1086                         ins[pos++] = get_irn_n(loop_head, i);
1087         }
1088
1089         new_head = new_Block(new_arity, ins);
1090
1091         phis = NEW_ARR_F(ir_node *, 0);
1092
1093         for_each_phi(loop_head, phi) {
1094                 ir_node *new_phi;
1095                 DB((dbg, LEVEL_5, "Fixing phi %N of loop head\n", phi));
1096
1097                 NEW_ARR_A(ir_node *, ins, new_arity);
1098
1099                 pos = 0;
1100                 for (i = 0; i < arity; ++i) {
1101                         ir_node *pred = get_irn_n(phi, i);
1102
1103                         if (is_own_backedge(loop_head, i)) {
1104                                 /* If assignment is in the condition chain,
1105                                  * we need to create a phi in the new loop head.
1106                                  * This can only happen for df, not cf. See find_condition_chains. */
1107                                 if (is_nodes_block_marked(pred)) {
1108                                         /* Cannot do this here. */
1109                                         ins[pos++] = pred; /*fix_inner_cc_definitions(phi, pred);*/
1110                                 } else {
1111                                         ins[pos++] = pred;
1112                                 }
1113                         }
1114                 }
1115
1116                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1117                         new_head, new_arity, ins,
1118                         get_irn_mode(phi));
1119
1120                 ARR_APP1(ir_node *, phis, new_phi);
1121
1122                 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (arity %d)\n", phi, new_phi, pos ));
1123         }
1124
1125         pos = 0;
1126         for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1127                 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1128                 if (phis[pos] != phi)
1129                         exchange(phi, phis[pos++]);
1130         }
1131
1132         DEL_ARR_F(phis);
1133
1134         DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1135         exchange(loop_head, new_head);
1136 }
1137
1138 /* Does the loop inversion.  */
1139 static void inversion_walk(entry_edge *head_entries)
1140 {
1141         int i;
1142
1143         /*
1144          * The order of rewiring bottom-up is crucial.
1145          * Any change of the order leads to lost information that would be needed later.
1146          */
1147
1148         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1149
1150         /* 1. clone condition chain */
1151         inc_irg_visited(current_ir_graph);
1152
1153         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1154                 entry_edge entry = head_entries[i];
1155                 ir_node *pred = get_irn_n(entry.node, entry.pos);
1156
1157                 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1158
1159                 copy_walk(pred, is_nodes_block_marked, cur_loop);
1160         }
1161
1162         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1163
1164         /* 2. Extends the head control flow successors ins
1165          *    with the definitions of the copied head node. */
1166         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1167                 entry_edge head_out = head_entries[i];
1168
1169                 if (is_Block(head_out.node))
1170                         extend_ins_by_copy(head_out.node, head_out.pos);
1171         }
1172
1173         /* 3. construct_ssa for users of definitions in the condition chain,
1174          *    as there is now a second definition. */
1175         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1176                 entry_edge head_out = head_entries[i];
1177
1178                 /* Ignore keepalives */
1179                 if (is_End(head_out.node))
1180                         continue;
1181
1182                 /* Construct ssa for assignments in the condition chain. */
1183                 if (!is_Block(head_out.node)) {
1184                         ir_node *pred, *cppred, *block, *cpblock;
1185
1186                         pred = head_out.pred;
1187                         cppred = get_inversion_copy(pred);
1188                         block = get_nodes_block(pred);
1189                         cpblock = get_nodes_block(cppred);
1190                         construct_ssa(block, pred, cpblock, cppred);
1191                 }
1192         }
1193
1194         /*
1195          * If there is an assignment in the condition chain
1196          * with a user also in the condition chain,
1197          * the dominance frontier is in the new loop head.
1198          * The dataflow loop is completely in the condition chain.
1199          * Goal:
1200          *  To be wired: >|
1201          *
1202          *  | ,--.   |
1203          * Phi_cp |  | copied condition chain
1204          * >| |   |  |
1205          * >| ?__/   |
1206          * >| ,-.
1207          *  Phi* |   | new loop head with newly created phi.
1208          *   |   |
1209          *  Phi  |   | original, inverted condition chain
1210          *   |   |   |
1211          *   ?__/    |
1212          *
1213          */
1214         for (i = 0; i < ARR_LEN(head_df_loop); ++i) {
1215                 entry_edge head_out = head_df_loop[i];
1216
1217                 /* Construct ssa for assignments in the condition chain. */
1218                 ir_node *pred, *cppred, *block, *cpblock;
1219
1220                 pred = head_out.pred;
1221                 cppred = get_inversion_copy(pred);
1222                 assert(cppred && pred);
1223                 block = get_nodes_block(pred);
1224                 cpblock = get_nodes_block(cppred);
1225                 construct_ssa(block, pred, cpblock, cppred);
1226         }
1227
1228         /* 4. Remove the ins which are no backedges from the original condition chain
1229          *    as the cc is now subsequent to the body. */
1230         fix_head_inversion();
1231
1232         /* 5. Remove the backedges of the copied condition chain,
1233          *    because it is going to be the new 'head' in advance to the loop. */
1234         fix_copy_inversion();
1235 }
1236
1237 /* Performs loop inversion of cur_loop if possible and reasonable. */
1238 static void loop_inversion(void)
1239 {
1240         unsigned do_inversion = 1;
1241         unsigned has_cc = 0;
1242
1243         /*inversion_head_node_limit = INT_MAX;*/
1244         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1245
1246         /* Reset block marks.
1247          * We use block marks to flag blocks of the original condition chain. */
1248         irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1249
1250         /*loop_info.blocks = get_loop_n_blocks(cur_loop);*/
1251         cond_chain_entries = NEW_ARR_F(entry_edge, 0);
1252         head_df_loop = NEW_ARR_F(entry_edge, 0);
1253
1254         /*head_inversion_node_count = 0;*/
1255         inversion_blocks_in_cc = 0;
1256
1257         /* Use phase to keep copy of nodes from the condition chain. */
1258         phase = new_phase(current_ir_graph, phase_irn_init_default);
1259
1260         /* Search for condition chains and temporarily save the blocks in an array. */
1261         cc_blocks = NEW_ARR_F(ir_node *, 0);
1262         inc_irg_visited(current_ir_graph);
1263         has_cc = find_condition_chain(loop_head);
1264
1265         unmark_not_allowed_cc_blocks();
1266         DEL_ARR_F(cc_blocks);
1267
1268 #if IGNORE_NODE_LIMITS
1269         (void) unmark_cc_blocks;
1270 #else
1271         /* Condition chain too large.
1272          * Loop should better be small enough to fit into the cache. */
1273         /* FIXME Of course, we should take a small enough cc in the first place,
1274          * which is not that simple. (bin packing)  */
1275         if (loop_info.cc_size > opt_params.max_cc_size) {
1276                 count_stats(stats.cc_limit_reached);
1277
1278                 /* Only head taken? */
1279                 if (inversion_blocks_in_cc == 1)
1280                         do_inversion = 0;
1281                 else
1282                         /* Unmark cc blocks except the head.
1283                          * Invert head only for possible unrolling. */
1284                         unmark_cc_blocks();
1285         }
1286 #endif
1287
1288         /* We also catch endless loops here,
1289          * because they do not have a condition chain. */
1290         if (inversion_blocks_in_cc < 1) {
1291                 do_inversion = 0;
1292                 DB((dbg, LEVEL_3,
1293                         "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1294                         inversion_blocks_in_cc));
1295         }
1296
1297         if (do_inversion) {
1298                 cur_head_outs = NEW_ARR_F(entry_edge, 0);
1299
1300                 /* Get all edges pointing into the condition chain. */
1301                 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1302
1303                 /* Do the inversion */
1304                 inversion_walk(cur_head_outs);
1305
1306                 DEL_ARR_F(cur_head_outs);
1307
1308                 /* Duplicated blocks changed doms */
1309                 set_irg_doms_inconsistent(current_ir_graph);
1310                 /* Loop content changed */
1311                 set_irg_loopinfo_inconsistent(current_ir_graph);
1312                 /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */
1313                 set_irg_outs_inconsistent(current_ir_graph);
1314
1315                 count_stats(stats.inverted);
1316         }
1317
1318         /* free */
1319         phase_free(phase);
1320         DEL_ARR_F(cond_chain_entries);
1321         DEL_ARR_F(head_df_loop);
1322
1323         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1324 }
1325
1326 /* Fix the original loop_heads ins for invariant unrolling case. */
1327 static void unrolling_fix_loop_head_inv(void)
1328 {
1329         ir_node *ins[2];
1330         ir_node *phi;
1331         ir_node *proj = new_Proj(loop_info.duff_cond, mode_X, 0);
1332         ir_node *head_pred = get_irn_n(loop_head, loop_info.be_src_pos);
1333         ir_node *loop_condition = get_unroll_copy(head_pred, unroll_nr - 1);
1334
1335         /* Original loop_heads ins are:
1336          * duff block and the own backedge */
1337
1338         ins[0] = loop_condition;
1339         ins[1] = proj;
1340
1341         set_irn_in(loop_head, 2, ins);
1342
1343         for_each_phi(loop_head, phi) {
1344                 ir_node *pred = get_irn_n(phi, loop_info.be_src_pos);
1345                 ir_node *last_pred = get_unroll_copy(pred, unroll_nr - 1);
1346
1347                 ins[0] = last_pred;
1348                 ins[1] = get_irn_link(phi);
1349
1350                 set_irn_in(phi, 2, ins);
1351         }
1352 }
1353
1354 /* Removes previously created phis with only 1 in. */
1355 static void correct_phis(ir_node *node, void *env)
1356 {
1357         (void)env;
1358         if (is_Phi(node) && get_irn_arity(node) == 1) {
1359                 ir_node *exch;
1360                 ir_node *in[1];
1361
1362                 in[0] = get_irn_n(node, 0);
1363
1364                 exch = new_rd_Phi(get_irn_dbg_info(node),
1365                     get_nodes_block(node), 1, in,
1366                 get_irn_mode(node));
1367
1368                 exchange(node, exch);
1369         }
1370 }
1371
1372 /* Unrolling: Rewire floating copies. */
1373 static void place_copies(int copies)
1374 {
1375         ir_node *loophead = loop_head;
1376         int c, i;
1377         int be_src_pos = loop_info.be_src_pos;
1378
1379         /* Serialize loops by fixing their head ins.
1380          * Processed are the copies.
1381          * The original loop is done after that, to keep backedge infos. */
1382         for (c = 0; c < copies; ++c) {
1383                 ir_node *upper = get_unroll_copy(loophead, c);
1384                 ir_node *lower = get_unroll_copy(loophead, c + 1);
1385                 ir_node *phi;
1386                 ir_node *topmost_be_block = get_nodes_block(get_irn_n(loophead, be_src_pos));
1387
1388                 /* Important: get the preds first and then their copy. */
1389                 ir_node *upper_be_block = get_unroll_copy(topmost_be_block, c);
1390                 ir_node *new_jmp = new_r_Jmp(upper_be_block);
1391                 DB((dbg, LEVEL_5, " place_copies upper %N lower %N\n", upper, lower));
1392
1393                 DB((dbg, LEVEL_5, "topmost be block %N \n", topmost_be_block));
1394
1395                 if (loop_info.unroll_kind == constant) {
1396                         ir_node *ins[1];
1397                         ins[0] = new_jmp;
1398                         set_irn_in(lower, 1, ins);
1399
1400                         for_each_phi(loophead, phi) {
1401                                 ir_node *topmost_def = get_irn_n(phi, be_src_pos);
1402                                 ir_node *upper_def = get_unroll_copy(topmost_def, c);
1403                                 ir_node *lower_phi = get_unroll_copy(phi, c + 1);
1404
1405                                 /* It is possible, that the value used
1406                                  * in the OWN backedge path is NOT defined in this loop. */
1407                                 if (is_in_loop(topmost_def))
1408                                         ins[0] = upper_def;
1409                                 else
1410                                         ins[0] = topmost_def;
1411
1412                                 set_irn_in(lower_phi, 1, ins);
1413                                 /* Need to replace phis with 1 in later. */
1414                         }
1415                 } else {
1416                         /* Invariant case */
1417                         /* Every node has 2 ins. One from the duff blocks
1418                          * and one from the previous unrolled loop. */
1419                         ir_node *ins[2];
1420                         /* Calculate corresponding projection of mod result for this copy c */
1421                         ir_node *proj = new_Proj(loop_info.duff_cond, mode_X, unroll_nr - c - 1);
1422
1423                         ins[0] = new_jmp;
1424                         ins[1] = proj;
1425                         set_irn_in(lower, 1, ins);
1426
1427                         for_each_phi(loophead, phi) {
1428                                 ir_node *topmost_phi_pred = get_irn_n(phi, be_src_pos);
1429                                 ir_node *upper_phi_pred;
1430                                 ir_node *lower_phi;
1431                                 ir_node *duff_phi;
1432
1433                                 lower_phi = get_unroll_copy(phi, c + 1);
1434                                 duff_phi = get_irn_link(lower_phi);
1435
1436                                 if (is_in_loop(topmost_phi_pred)) {
1437                                         upper_phi_pred = get_unroll_copy(topmost_phi_pred, c);
1438                                 } else {
1439                                         upper_phi_pred = topmost_phi_pred;
1440                                 }
1441
1442                                 ins[0] = upper_phi_pred;
1443                                 ins[1] = duff_phi;
1444
1445                                 set_irn_in(lower_phi, 2, ins);
1446                         }
1447                 }
1448         }
1449
1450         /* Reconnect loop landing pad with last copy. */
1451         for (i = 0; i < ARR_LEN(loop_entries); ++i) {
1452                 entry_edge edge = loop_entries[i];
1453                 /* Last copy is at the bottom */
1454                 ir_node *new_pred = get_unroll_copy(edge.pred, copies);
1455                 set_irn_n(edge.node, edge.pos, new_pred);
1456         }
1457
1458         /* Fix original loops head.
1459          * Done in the end, as ins and be info were needed before. */
1460         if (loop_info.unroll_kind == constant) {
1461                 ir_node *phi;
1462                 ir_node *head_pred = get_irn_n(loop_head, be_src_pos);
1463                 ir_node *loop_condition = get_unroll_copy(head_pred, unroll_nr - 1);
1464
1465                 set_irn_n(loop_head, loop_info.be_src_pos, loop_condition);
1466
1467                 for_each_phi(loop_head, phi) {
1468                         ir_node *pred = get_irn_n(phi, be_src_pos);
1469                         ir_node *last_pred;
1470
1471                         /* It is possible, that the value used
1472                          * in the OWN backedge path is NOT defined in this loop. */
1473                         if (is_in_loop(pred))
1474                                 last_pred = get_unroll_copy(pred, copies);
1475                         else
1476                                 last_pred = pred;
1477                         set_irn_n(phi, be_src_pos, last_pred);
1478                 }
1479         } else {
1480                 unrolling_fix_loop_head_inv();
1481         }
1482 }
1483
1484 /* Copies the cur_loop several times. */
1485 static void copy_loop(entry_edge *cur_loop_outs, int copies)
1486 {
1487         int i, c;
1488
1489         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1490
1491         for (c = 0; c < copies; ++c) {
1492
1493                 inc_irg_visited(current_ir_graph);
1494
1495                 DB((dbg, LEVEL_5, "         ### Copy_loop  copy nr: %d ###\n", c));
1496                 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1497                         entry_edge entry = cur_loop_outs[i];
1498                         ir_node *pred = get_irn_n(entry.node, entry.pos);
1499
1500                         copy_walk_n(pred, is_in_loop, c + 1);
1501                 }
1502         }
1503
1504         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1505 }
1506
1507
1508 /* Creates a new phi from the given phi node omitting own bes,
1509  * using be_block as supplier of backedge informations. */
1510 static ir_node *clone_phis_sans_bes(ir_node *node, ir_node *be_block)
1511 {
1512         ir_node **ins;
1513         int arity = get_irn_arity(node);
1514         int i, c = 0;
1515
1516         assert(get_irn_arity(node) == get_irn_arity(be_block));
1517         assert(is_Phi(node));
1518
1519         ins = NEW_ARR_F(ir_node *, arity);
1520         for (i = 0; i < arity; ++i) {
1521                 if (! is_own_backedge(be_block, i)) {
1522                         ins[c] = get_irn_n(node, i);
1523                         ++c;
1524                 }
1525         /*      } else {
1526                         ir_node *pred = get_inr_n(node, i);
1527                         if (! is_in_loop(pred)) {
1528                                 ins[c] = pred;
1529                                 ++c;
1530                         }
1531                 }*/
1532         }
1533
1534         return new_r_Phi(get_nodes_block(node), c, ins, get_irn_mode(node));
1535 }
1536
1537 /* Creates a new block from the given block node omitting own bes,
1538  * using be_block as supplier of backedge informations. */
1539 static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block)
1540 {
1541         ir_node **ins;
1542         int arity = get_irn_arity(node);
1543         int i, c = 0;
1544
1545         assert(get_irn_arity(node) == get_irn_arity(be_block));
1546         assert(is_Block(node));
1547
1548         ins = NEW_ARR_F(ir_node *, arity);
1549         for (i = 0; i < arity; ++i) {
1550                 if (! is_own_backedge(be_block, i)) {
1551                         ins[c] = get_irn_n(node, i);
1552                         ++c;
1553                 }
1554         }
1555
1556         return new_Block(c, ins);
1557 }
1558
1559 /* Creates blocks for duffs device, using previously obtained
1560  * informations about the iv.
1561  * TODO split */
1562 static void create_duffs_block(void)
1563 {
1564         ir_mode *mode;
1565
1566         ir_node *block1, *count_block, *duff_block;
1567         ir_node *ems, *ems_divmod, *ems_mod_proj, *cmp_null,
1568                 *cmp_proj, *ems_mode_cond, *x_true, *x_false, *const_null;
1569         ir_node *true_val, *false_val;
1570         ir_node *ins[2];
1571
1572         ir_node *duff_mod, *proj, *cond;
1573
1574         ir_node *count, *correction, *unroll_c;
1575         ir_node *cmp_bad_count, *good_count, *bad_count, *count_phi, *bad_count_neg;
1576
1577         mode = get_irn_mode(loop_info.end_val);
1578         const_null = new_Const(get_mode_null(mode));
1579
1580         /* TODO naming
1581          * 1. Calculate first approach to count.
1582          *    Condition: (end - start) % step == 0 */
1583         block1 = clone_block_sans_bes(loop_head, loop_head);
1584
1585         /* Create loop entry phis in first duff block
1586          * as it becomes the loops preheader */
1587         if (loop_info.unroll_kind == invariant) {
1588                 ir_node *phi;
1589                 for_each_phi(loop_head, phi) {
1590                         ir_node *new_phi = clone_phis_sans_bes(phi, loop_head);
1591                         set_nodes_block(new_phi, block1);
1592                 }
1593         }
1594
1595         ems = new_r_Sub(block1, loop_info.end_val, loop_info.start_val,
1596                 get_irn_mode(loop_info.end_val));
1597
1598         ems_divmod = new_r_DivMod(block1,
1599                 new_NoMem(),
1600                 ems,
1601                 loop_info.step,
1602                 mode,
1603                 op_pin_state_pinned);
1604
1605         ems_mod_proj = new_r_Proj(ems_divmod, mode, pn_DivMod_res_mod);
1606         cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null);
1607         cmp_proj = new_r_Proj(cmp_null, mode, pn_Cmp_Eq);
1608         ems_mode_cond = new_Cond(cmp_proj);
1609
1610         /* ems % step == 0 */
1611         x_true = new_Proj(ems_mode_cond, mode_X, pn_Cond_true);
1612         /* ems % step != 0 */
1613         x_false = new_Proj(ems_mode_cond, mode_X, pn_Cond_false);
1614
1615
1616         /* 2. Second block.
1617          * Assures, duffs device receives a valid count.
1618          * Condition:
1619          *     decreasing: count < 0
1620          *     increasing: count > 0
1621          */
1622         ins[0] = x_true;
1623         ins[1] = x_false;
1624
1625         count_block = new_Block(2, ins);
1626
1627         /* Increase loop-taken-count depending on the loop condition
1628          * uses the latest iv to compare to. */
1629         if (loop_info.latest_value == 1) {
1630                 /* ems % step == 0 :  +0 */
1631                 true_val = new_Const(get_mode_null(mode));
1632                 /* ems % step != 0 :  +1 */
1633                 false_val = new_Const(get_mode_one(mode));
1634         } else {
1635                 tarval *tv_two = new_tarval_from_long(2, mode);
1636                 /* ems % step == 0 :  +1 */
1637                 true_val = new_Const(get_mode_one(mode));
1638                 /* ems % step != 0 :  +2 */
1639                 false_val = new_Const(tv_two);
1640         }
1641
1642         ins[0] = true_val;
1643         ins[1] = false_val;
1644
1645         correction = new_r_Phi(count_block, 2, ins, mode);
1646
1647         count = new_r_Proj(ems_divmod, mode, pn_DivMod_res_div);
1648
1649         /* (end - start) / step  +  correction */
1650         count = new_Add(count, correction, mode);
1651
1652         cmp_bad_count = new_r_Cmp(count_block, count, const_null);
1653
1654         /* We preconditioned the loop to be tail-controlled.
1655          * So, if count is something 'wrong' like 0,
1656          * negative/positive (depending on step direction),
1657          * we may take the loop once (tail-contr.) and leave it
1658          * to the existing condition, to break; */
1659
1660         /* Depending on step direction, we have to check for > or < 0 */
1661         if (loop_info.decreasing == 1) {
1662                 bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Lt);
1663         } else {
1664                 bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Gt);
1665         }
1666
1667         bad_count_neg = new_Cond(bad_count_neg);
1668         good_count = new_Proj(bad_count_neg, mode_X, pn_Cond_true);
1669         bad_count = new_Proj(ems_mode_cond, mode_X, pn_Cond_false);
1670
1671         /* 3. Duff Block
1672          *    Contains module to decide which loop to start from. */
1673
1674         ins[0] = good_count;
1675         ins[1] = bad_count;
1676         duff_block = new_Block(2, ins);
1677
1678         /* count wants to be positive */
1679         ins[0] = get_Abs_op(count);
1680         /* Manually feed the aforementioned count = 1 (bad case)*/
1681         ins[1] = new_Const(get_mode_one(mode));
1682         count_phi = new_r_Phi(duff_block, 2, ins, mode);
1683
1684         unroll_c = new_Const(new_tarval_from_long((long)unroll_nr, mode));
1685
1686         /* count % unroll_nr */
1687         duff_mod = new_r_Mod(duff_block,
1688                 new_NoMem(),
1689                 count_phi,
1690                 unroll_c,
1691                 mode,
1692                 op_pin_state_pinned);
1693
1694         proj = new_Proj(duff_mod, mode_X, pn_Mod_res);
1695         cond = new_Cond(proj);
1696
1697         loop_info.duff_cond = cond;
1698 }
1699
1700 /* Returns 1 if given node is not in loop,
1701  * or if it is a phi of the loop head with only loop invariant defs.
1702  */
1703 static unsigned is_loop_invariant_def(ir_node *node)
1704 {
1705         int i;
1706
1707         if (! is_in_loop(node))
1708                 return 1;
1709
1710         /* If this is a phi of the loophead shared by more than 1 loop,
1711          * we need to check if all defs are not in the loop.  */
1712         if (is_Phi(node)) {
1713                 ir_node *block;
1714                 block = get_nodes_block(node);
1715
1716                 /* To prevent unexpected situations. */
1717                 if (block != loop_head)
1718                         return 0;
1719
1720                 for (i = 0; i < get_irn_arity(node); ++i) {
1721                         /* Check if all bes are just loopbacks. */
1722                         if (is_own_backedge(block, i) && get_irn_n(node, i) != node)
1723                                 return 0;
1724                 }
1725         }
1726         return 1;
1727 }
1728
1729 /* Returns 1 if one pred of node is invariant and the other is not.
1730  * invar_pred and other are set analogously. */
1731 static unsigned get_invariant_pred(ir_node *node, ir_node **invar_pred, ir_node **other)
1732 {
1733         ir_node *pred0 = get_irn_n(node, 0);
1734         ir_node *pred1 = get_irn_n(node, 1);
1735
1736         *invar_pred = NULL;
1737         *other = NULL;
1738
1739         if (is_loop_invariant_def(pred0)) {
1740                 *invar_pred = pred0;
1741                 *other = pred1;
1742         }
1743
1744         if (is_loop_invariant_def(pred1)) {
1745                 if (invar_pred != NULL)
1746                         /* RETURN. We do not want both preds to be invariant. */
1747                         return 0;
1748
1749                 *other = pred0;
1750                 *invar_pred = pred1;
1751                 return 1;
1752         } else {
1753                 return 0;
1754         }
1755 }
1756
1757 /* Starts from a phi that may belong to an iv.
1758  * If an add forms a loop with iteration_phi,
1759  * and add uses a constant, 1 is returned
1760  * and 'start' as well as 'add' are sane. */
1761 static unsigned get_start_and_add(ir_node *iteration_phi, unrolling_kind_flag role)
1762 {
1763         int i;
1764         ir_node *found_add = loop_info.add;
1765         int arity = get_irn_arity(iteration_phi);
1766
1767         DB((dbg, LEVEL_4, "Find start and add from %N\n", iteration_phi));
1768
1769         for (i = 0; i < arity; ++i) {
1770
1771                 /* Find start_val which needs to be pred of the iteration_phi.
1772                  * If start_val already known, sanity check. */
1773                 if (!is_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1774                         ir_node *found_start_val = get_irn_n(loop_info.iteration_phi, i);
1775
1776                         DB((dbg, LEVEL_4, "found_start_val %N\n", found_start_val));
1777
1778                         /* We already found a start_val it has to be always the same. */
1779                         if (loop_info.start_val && found_start_val != loop_info.start_val)
1780                                 return 0;
1781
1782                         if ((role == constant) && !(is_SymConst(found_start_val) || is_Const(found_start_val)))
1783                                         return 0;
1784                         else if((role == constant) && !(is_loop_invariant_def(found_start_val)))
1785                                         return 0;
1786
1787                         loop_info.start_val = found_start_val;
1788                 }
1789
1790                 /* The phi has to be in the loop head.
1791                  * Follow all own backedges. Every value supplied from these preds of the phi
1792                  * needs to origin from the same add. */
1793                 if (is_own_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1794                         ir_node *new_found = get_irn_n(loop_info.iteration_phi,i);
1795
1796                         DB((dbg, LEVEL_4, "is add? %N\n", new_found));
1797
1798                         if (! (is_Add(new_found) || is_Sub(new_found)) || (found_add && found_add != new_found))
1799                                 return 0;
1800                         else
1801                                 found_add = new_found;
1802                 }
1803         }
1804
1805         loop_info.add = found_add;
1806
1807         return 1;
1808 }
1809
1810
1811 /* Returns 1 if one pred of node is a const value and the other is not.
1812  * const_pred and other are set analogously. */
1813 static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **other)
1814 {
1815         ir_node *pred0 = get_irn_n(node, 0);
1816         ir_node *pred1 = get_irn_n(node, 1);
1817
1818         DB((dbg, LEVEL_4, "Checking for constant pred of %N\n", node));
1819
1820         *const_pred = NULL;
1821         *other = NULL;
1822
1823         /*DB((dbg, LEVEL_4, "is %N const\n", pred0));*/
1824         if (is_Const(pred0) || is_SymConst(pred0)) {
1825                 DB((dbg, LEVEL_1, "%N is constant\n", pred0));
1826                 *const_pred = pred0;
1827                 *other = pred1;
1828         }
1829
1830         /*DB((dbg, LEVEL_4, "is %N const\n", pred1));*/
1831         if (is_Const(pred1) || is_SymConst(pred1)) {
1832                 if (*const_pred != NULL) {
1833                         DB((dbg, LEVEL_1, "%N is ALSO constant\n", pred1));
1834                         /* RETURN. We do not want both preds to be constant. */
1835                         return 0;
1836                 }
1837
1838                 DB((dbg, LEVEL_4, "%N is constant\n", pred1));
1839                 *other = pred0;
1840                 *const_pred = pred1;
1841         }
1842
1843         if (*const_pred == NULL)
1844                 return 0;
1845         else
1846                 return 1;
1847 }
1848
1849 /* Returns the mathematically inverted pn_Cmp. */
1850 static pn_Cmp get_math_inverted_case(pn_Cmp proj)
1851 {
1852         switch(proj) {
1853                 case pn_Cmp_Eq:
1854                         return pn_Cmp_Lg;
1855                 case pn_Cmp_Lg:
1856                         return pn_Cmp_Eq;
1857                 case pn_Cmp_Lt:
1858                         return pn_Cmp_Ge;
1859                 case pn_Cmp_Le:
1860                         return pn_Cmp_Gt;
1861                 case pn_Cmp_Gt:
1862                         return pn_Cmp_Le;
1863                 case pn_Cmp_Ge:
1864                         return pn_Cmp_Lt;
1865                 default:
1866                         panic("Unhandled pn_Cmp.");
1867         }
1868 }
1869
1870 /* norm_proj means we do not exit the loop. */
1871 static unsigned simulate_next(tarval **count_tar,
1872                 tarval *stepped, tarval *step_tar, tarval *end_tar, pn_Cmp norm_proj)
1873 {
1874         tarval *next;
1875
1876         DB((dbg, LEVEL_1, "Loop taken if (stepped)%ld %s (end)%ld ",
1877                                 get_tarval_long(stepped),
1878                                 get_pnc_string((norm_proj)),
1879                                 get_tarval_long(end_tar)));
1880         DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value));
1881
1882         /* If current iv does not stay in the loop,
1883          * this run satisfied the exit condition. */
1884         if (! (tarval_cmp(stepped, end_tar) & norm_proj))
1885                 return 1;
1886
1887         DB((dbg, LEVEL_1, "Result: (stepped)%ld IS %s (end)%ld\n",
1888                                 get_tarval_long(stepped),
1889                                 get_pnc_string(tarval_cmp(stepped, end_tar)),
1890                                 get_tarval_long(end_tar)));
1891
1892         /* next step */
1893         if (is_Add(loop_info.add))
1894                 next = tarval_add(stepped, step_tar);
1895         else
1896                 /* sub */
1897                 next = tarval_sub(stepped, step_tar, get_irn_mode(loop_info.end_val));
1898
1899         DB((dbg, LEVEL_1, "Loop taken if %ld %s %ld ",
1900                                 get_tarval_long(next),
1901                                 get_pnc_string(norm_proj),
1902                                 get_tarval_long(end_tar)));
1903         DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value));
1904
1905         /* Increase steps. */
1906         *count_tar = tarval_add(*count_tar, get_tarval_one(get_tarval_mode(*count_tar)));
1907
1908         /* Next has to fail the loop condition, or we will never exit. */
1909         if (! (tarval_cmp(next, end_tar) & norm_proj))
1910                 return 1;
1911         else
1912                 return 0;
1913 }
1914
1915 /* Check if loop meets requirements for a 'simple loop':
1916  * - Exactly one cf out
1917  * - Allowed calls
1918  * - Max nodes after unrolling
1919  * - tail-controlled
1920  * - exactly one be
1921  * - cmp
1922  * Returns Projection of cmp node or NULL; */
1923 static ir_node *is_simple_loop(void)
1924 {
1925         int arity, i;
1926         unsigned loop_depth, max_loop_nodes_adapted;
1927         ir_node *loop_block, *exit_block, *projx, *cond, *projres, *loop_condition;
1928
1929         /* Maximum of one condition, and no endless loops. */
1930         if (loop_info.cf_outs != 1)
1931                 return NULL;
1932
1933         DB((dbg, LEVEL_4, "1 loop exit\n"));
1934
1935 #if 0
1936         /* Ignore loop size. Probably not wise in other than testcases. */
1937         (void) max_loop_nodes_adapted;
1938         (void) loop_depth;
1939
1940         loop_info.max_unroll = 6;
1941 #else
1942         /* Calculate maximum unroll_nr keeping node count below limit. */
1943         loop_depth = get_loop_depth(cur_loop) - 1;
1944         max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth);
1945
1946         loop_info.max_unroll = opt_params.max_loop_size / loop_info.nodes;
1947         if (loop_info.max_unroll < 2) {
1948                 count_stats(stats.too_large);
1949                 return NULL;
1950         }
1951 #endif
1952         DB((dbg, LEVEL_4, "maximum unroll factor %u, to not exceed node limit \n",
1953                 loop_info.max_unroll));
1954
1955         arity = get_irn_arity(loop_head);
1956         /* RETURN if we have more than 1 be. */
1957         /* Get my backedges without alien bes. */
1958         loop_block = NULL;
1959         for (i = 0; i < arity; ++i) {
1960                 ir_node *pred = get_irn_n(loop_head, i);
1961                 if (is_own_backedge(loop_head, i)) {
1962                         if (loop_block)
1963                                 /* Our simple loops may have only one backedge. */
1964                                 return NULL;
1965                         else {
1966                                 loop_block = get_nodes_block(pred);
1967                                 loop_info.be_src_pos = i;
1968                         }
1969                 }
1970         }
1971
1972         DB((dbg, LEVEL_4, "loop has 1 own backedge.\n"));
1973
1974         exit_block = get_nodes_block(loop_info.cf_out.pred);
1975         /* The loop has to be tail-controlled.
1976          * This can be changed/improved,
1977          * but we would need a duff iv. */
1978         if (exit_block != loop_block)
1979                 return NULL;
1980
1981         DB((dbg, LEVEL_4, "tail-controlled loop.\n"));
1982
1983         /* find value on which loop exit depends */
1984         projx = loop_info.cf_out.pred;
1985         cond = get_irn_n(projx, 0);
1986         projres = get_irn_n(cond, 0);
1987         loop_condition = get_irn_n(projres, 0);
1988
1989         if (!is_Cmp(loop_condition))
1990                 return NULL;
1991
1992         DB((dbg, LEVEL_5, "projection is %s\n", get_pnc_string(get_Proj_proj(projx))));
1993
1994         switch(get_Proj_proj(projx)) {
1995                 case pn_Cond_false:
1996                         loop_info.exit_cond = 0;
1997                         break;
1998                 case pn_Cond_true:
1999                         loop_info.exit_cond = 1;
2000                         break;
2001                 default:
2002                         panic("Cond Proj_proj other than true/false");
2003         }
2004
2005         DB((dbg, LEVEL_4, "Valid Cmp.\n"));
2006
2007         return projres;
2008 }
2009
2010 /* Returns 1 if all nodes are mode_Iu or mode_Is. */
2011 static unsigned are_mode_I(ir_node *n1, ir_node* n2, ir_node *n3)
2012 {
2013         ir_mode *m1 = get_irn_mode(n1);
2014         ir_mode *m2 = get_irn_mode(n2);
2015         ir_mode *m3 = get_irn_mode(n3);
2016
2017         if ((m1 == mode_Iu && m2 == mode_Iu && m3 == mode_Iu) ||
2018             (m1 == mode_Is && m2 == mode_Is && m3 == mode_Is))
2019                 return 1;
2020         else
2021                 return 0;
2022 }
2023
2024 /* Checks if cur_loop is a simple tail-controlled counting loop
2025  * with start and end value loop invariant, step constant. */
2026 static unsigned get_unroll_decision_invariant(void)
2027 {
2028
2029         ir_node         *projres, *loop_condition, *iteration_path;
2030         unsigned        success, is_latest_val;
2031         tarval          *start_tar, *step_tar;
2032         ir_mode         *mode;
2033
2034         /* RETURN if loop is not 'simple' */
2035         projres = is_simple_loop();
2036         if (projres == NULL)
2037                 return 0;
2038
2039         loop_condition = get_irn_n(projres, 0);
2040
2041         success = get_invariant_pred(loop_condition, &loop_info.end_val, &iteration_path);
2042         if (! success)
2043                 return 0;
2044
2045         DB((dbg, LEVEL_4, "Invariant End_val %N, other %N\n", loop_info.end_val, iteration_path));
2046
2047         /* We may find the add or the phi first.
2048          * Until now we only have end_val. */
2049         if (is_Add(iteration_path) || is_Sub(iteration_path)) {
2050
2051                 /* We test against the latest value of the iv. */
2052                 is_latest_val = 1;
2053
2054                 loop_info.add = iteration_path;
2055                 DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add));
2056
2057                 /* Preds of the add should be step and the iteration_phi */
2058                 success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi);
2059                 if (! success)
2060                         return 0;
2061
2062                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2063
2064                 if (! is_Phi(loop_info.iteration_phi))
2065                         return 0;
2066
2067                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2068
2069                 /* Find start_val.
2070                  * Does necessary sanity check of add, if it is already set.  */
2071                 success = get_start_and_add(loop_info.iteration_phi, invariant);
2072                 if (! success)
2073                         return 0;
2074
2075                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2076
2077         } else if (is_Phi(iteration_path)) {
2078                 ir_node *new_iteration_phi;
2079
2080                 /* We compare with the value the iv had entering this run. */
2081                 is_latest_val = 0;
2082
2083                 loop_info.iteration_phi = iteration_path;
2084                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2085
2086                 /* Find start_val and add-node.
2087                  * Does necessary sanity check of add, if it is already set.  */
2088                 success = get_start_and_add(loop_info.iteration_phi, invariant);
2089                 if (! success)
2090                         return 0;
2091
2092                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2093                 DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add));
2094
2095                 success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi);
2096                 if (! success)
2097                         return 0;
2098
2099                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2100
2101                 if (loop_info.iteration_phi != new_iteration_phi)
2102                         return 0;
2103
2104         } else {
2105                 return 0;
2106         }
2107
2108         mode = get_irn_mode(loop_info.end_val);
2109
2110         DB((dbg, LEVEL_4, "start %N, end %N, step %N\n",
2111                                 loop_info.start_val, loop_info.end_val, loop_info.step));
2112
2113         if (mode != mode_Is && mode != mode_Iu)
2114                 return 0;
2115
2116         /* TODO necessary? */
2117         if (!are_mode_I(loop_info.start_val, loop_info.step, loop_info.end_val))
2118                 return 0;
2119
2120         DB((dbg, LEVEL_4, "mode integer\n"));
2121
2122         step_tar = get_Const_tarval(loop_info.step);
2123         start_tar = get_Const_tarval(loop_info.start_val);
2124
2125         if (tarval_is_null(step_tar)) {
2126                 /* TODO Might be worth a warning. */
2127                 return 0;
2128         }
2129
2130         DB((dbg, LEVEL_4, "step is not 0\n"));
2131
2132         create_duffs_block();
2133
2134         return loop_info.max_unroll;
2135 }
2136
2137 /* Returns unroll factor,
2138  * given maximum unroll factor and number of loop passes. */
2139 static unsigned get_preferred_factor_constant(tarval *count_tar)
2140 {
2141         tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2;
2142         unsigned prefer;
2143         ir_mode *mode = get_irn_mode(loop_info.end_val);
2144
2145         tar_6 = new_tarval_from_long(6, mode);
2146         tar_5 = new_tarval_from_long(5, mode);
2147         tar_4 = new_tarval_from_long(4, mode);
2148         tar_3 = new_tarval_from_long(3, mode);
2149         tar_2 = new_tarval_from_long(2, mode);
2150
2151         /* loop passes % {6, 5, 4, 3, 2} == 0  */
2152         if (tarval_is_null(tarval_mod(count_tar, tar_6)))
2153                 prefer = 6;
2154         else if (tarval_is_null(tarval_mod(count_tar, tar_5)))
2155                 prefer = 5;
2156         else if (tarval_is_null(tarval_mod(count_tar, tar_4)))
2157                 prefer = 4;
2158         else if (tarval_is_null(tarval_mod(count_tar, tar_3)))
2159                 prefer = 3;
2160         else if (tarval_is_null(tarval_mod(count_tar, tar_2)))
2161                 prefer = 2;
2162         else {
2163                 /* gcd(max_unroll, count_tar) */
2164                 int a = loop_info.max_unroll;
2165                 int b = (int)get_tarval_long(count_tar);
2166                 int c;
2167
2168                 DB((dbg, LEVEL_4, "gcd of max_unroll %d and count_tar %d: ", a, b));
2169
2170                 do {
2171                 c = a % b;
2172                 a = b; b = c;
2173                 } while( c != 0);
2174
2175                 DB((dbg, LEVEL_4, "%d\n", a));
2176                 return a;
2177         }
2178
2179         DB((dbg, LEVEL_4, "preferred unroll factor %d\n", prefer));
2180
2181         /*
2182          * If our preference is greater than the allowed unroll factor
2183          * we either might reduce the preferred factor and prevent a duffs device block,
2184          * or create a duffs device block, from which in this case (constants only)
2185          * we know the startloop at compiletime.
2186          * The latter yields the following graphs.
2187          * but for code generation we would want to use graph A.
2188          * The graphs are equivalent. So, we can only reduce the preferred factor.
2189          * A)                   B)
2190          *     PreHead             PreHead
2191          *        |      ,--.         |   ,--.
2192          *         \ Loop1   \        Loop2   \
2193          *          \  |     |       /  |     |
2194          *           Loop2   /      / Loop1   /
2195          *           |   `--'      |      `--'
2196          */
2197
2198         if (prefer <= loop_info.max_unroll)
2199                 return prefer;
2200         else {
2201                 switch(prefer) {
2202                         case 6:
2203                                 if (loop_info.max_unroll >= 3)
2204                                         return 3;
2205                                 else if (loop_info.max_unroll >= 2)
2206                                         return 2;
2207                                 else
2208                                         return 0;
2209
2210                         case 4:
2211                                 if (loop_info.max_unroll >= 2)
2212                                         return 2;
2213                                 else
2214                                         return 0;
2215
2216                         default:
2217                                 return 0;
2218                 }
2219         }
2220 }
2221
2222 /* Check if cur_loop is a simple counting loop.
2223  * Start, step and end are constants. */
2224 /* TODO split. */
2225 static unsigned get_unroll_decision_constant(void)
2226 {
2227         ir_node         *projres, *loop_condition, *iteration_path;
2228         unsigned        success, is_latest_val;
2229         tarval          *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar, *stepped;
2230         pn_Cmp          proj_proj, norm_proj;
2231         ir_mode         *mode;
2232
2233         /* RETURN if loop is not 'simple' */
2234         projres = is_simple_loop();
2235         if (projres == NULL)
2236                 return 0;
2237
2238         /* One in of the loop condition needs to be loop invariant. => end_val
2239          * The other in is assigned by an add. => add
2240          * The add uses a loop invariant value => step
2241          * and a phi with a loop invariant start_val and the add node as ins.
2242
2243            ^   ^
2244            |   | .-,
2245            |   Phi |
2246                 \  |   |
2247           ^  Add   |
2248            \  | \__|
2249             cond
2250              /\
2251         */
2252
2253         loop_condition = get_irn_n(projres, 0);
2254
2255         success = get_const_pred(loop_condition, &loop_info.end_val, &iteration_path);
2256         if (! success)
2257                 return 0;
2258
2259         DB((dbg, LEVEL_4, "End_val %N, other %N\n", loop_info.end_val, iteration_path));
2260
2261         /* We may find the add or the phi first.
2262          * Until now we only have end_val. */
2263         if (is_Add(iteration_path) || is_Sub(iteration_path)) {
2264
2265                 /* We test against the latest value of the iv. */
2266                 is_latest_val = 1;
2267
2268                 loop_info.add = iteration_path;
2269                 DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add));
2270
2271                 /* Preds of the add should be step and the iteration_phi */
2272                 success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi);
2273                 if (! success)
2274                         return 0;
2275
2276                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2277
2278                 if (! is_Phi(loop_info.iteration_phi))
2279                         return 0;
2280
2281                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2282
2283                 /* Find start_val.
2284                  * Does necessary sanity check of add, if it is already set.  */
2285                 success = get_start_and_add(loop_info.iteration_phi, constant);
2286                 if (! success)
2287                         return 0;
2288
2289                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2290
2291         } else if (is_Phi(iteration_path)) {
2292                 ir_node *new_iteration_phi;
2293
2294                 /* We compare with the value the iv had entering this run. */
2295                 is_latest_val = 0;
2296
2297                 loop_info.iteration_phi = iteration_path;
2298                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2299
2300                 /* Find start_val and add-node.
2301                  * Does necessary sanity check of add, if it is already set.  */
2302                 success = get_start_and_add(loop_info.iteration_phi, constant);
2303                 if (! success)
2304                         return 0;
2305
2306                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2307                 DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add));
2308
2309                 success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi);
2310                 if (! success)
2311                         return 0;
2312
2313                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2314
2315                 if (loop_info.iteration_phi != new_iteration_phi)
2316                         return 0;
2317
2318         } else {
2319                 /* RETURN */
2320                 return 0;
2321         }
2322
2323         mode = get_irn_mode(loop_info.end_val);
2324
2325         DB((dbg, LEVEL_4, "start %N, end %N, step %N\n",
2326                                 loop_info.start_val, loop_info.end_val, loop_info.step));
2327
2328         if (mode != mode_Is && mode != mode_Iu)
2329                 return 0;
2330
2331         /* TODO necessary? */
2332         if (!are_mode_I(loop_info.start_val, loop_info.step, loop_info.end_val))
2333                 return 0;
2334
2335         DB((dbg, LEVEL_4, "mode integer\n"));
2336
2337         end_tar = get_Const_tarval(loop_info.end_val);
2338         start_tar = get_Const_tarval(loop_info.start_val);
2339         step_tar = get_Const_tarval(loop_info.step);
2340
2341         if (tarval_is_null(step_tar))
2342                 /* TODO Might be worth a warning. */
2343                 return 0;
2344
2345         DB((dbg, LEVEL_4, "step is not 0\n"));
2346
2347         if ((!tarval_is_negative(step_tar)) ^ (!is_Sub(loop_info.add)))
2348                 loop_info.decreasing = 1;
2349
2350         diff_tar = tarval_sub(end_tar, start_tar, mode);
2351
2352         /* We need at least count_tar steps to be close to end_val, maybe more.
2353          * No way, that we have gone too many steps.
2354          * This represents the 'latest value'.
2355          * (If condition checks against latest value, is checked later) */
2356         count_tar = tarval_div(diff_tar, step_tar);
2357
2358         /* Iv will not pass end_val (except overflows).
2359          * Nothing done, as it would yield to no advantage. */
2360         if (tarval_is_negative(count_tar)) {
2361                 DB((dbg, LEVEL_1, "Loop is endless or never taken."));
2362                 /* TODO Might be worth a warning. */
2363                 return 0;
2364         }
2365
2366         count_stats(stats.u_simple_counting_loop);
2367
2368         loop_info.latest_value = is_latest_val;
2369
2370         /* TODO split here
2371         if (! is_simple_counting_loop(&count_tar))
2372                 return 0;
2373         */
2374
2375         /* stepped can be negative, if step < 0 */
2376         stepped = tarval_mul(count_tar, step_tar);
2377
2378         /* step as close to end_val as possible, */
2379         /* |stepped| <= |end_tar|, and dist(stepped, end_tar) is smaller than a step. */
2380         if (is_Sub(loop_info.add))
2381                 stepped = tarval_sub(start_tar, stepped, mode_Is);
2382         else
2383                 stepped = tarval_add(start_tar, stepped);
2384
2385         DB((dbg, LEVEL_4, "stepped to %ld\n", get_tarval_long(stepped)));
2386
2387         proj_proj = get_Proj_proj(projres);
2388         /* Assure that norm_proj is the stay-in-loop case. */
2389         if (loop_info.exit_cond == 1)
2390                 norm_proj = get_math_inverted_case(proj_proj);
2391         else
2392                 norm_proj = proj_proj;
2393
2394         DB((dbg, LEVEL_4, "normalized projection %s\n", get_pnc_string(norm_proj)));
2395
2396         /* Executed at most once (stay in counting loop if a Eq b) */
2397         if (norm_proj == pn_Cmp_Eq)
2398                 /* TODO Might be worth a warning. */
2399                 return 0;
2400
2401         /* calculates next values and increases count_tar according to it */
2402         success = simulate_next(&count_tar, stepped, step_tar, end_tar, norm_proj);
2403         if (! success)
2404                 return 0;
2405
2406         /* We run loop once more, if we compare to the
2407          * not yet in-/decreased iv. */
2408         if (is_latest_val == 0) {
2409                 DB((dbg, LEVEL_4, "condition uses not latest iv value\n"));
2410                 count_tar = tarval_add(count_tar, get_tarval_one(mode));
2411         }
2412
2413         DB((dbg, LEVEL_4, "loop taken %ld times\n", get_tarval_long(count_tar)));
2414
2415         /* Assure the loop is taken at least 1 time. */
2416         if (tarval_is_null(count_tar)) {
2417                 /* TODO Might be worth a warning. */
2418                 return 0;
2419         }
2420
2421         loop_info.count_tar = count_tar;
2422         return get_preferred_factor_constant(count_tar);
2423 }
2424
2425 /**
2426  * Loop unrolling
2427  */
2428 static void unroll_loop(void)
2429 {
2430         unroll_nr = 0;
2431
2432         /* get_unroll_decision_constant and invariant are completely
2433          * independent for flexibility.
2434          * Some checks may be performed twice. */
2435
2436         /* constant case? */
2437         if (opt_params.allow_const_unrolling)
2438                 unroll_nr = get_unroll_decision_constant();
2439         if (unroll_nr > 1) {
2440                 loop_info.unroll_kind = constant;
2441
2442         } else {
2443                 /* invariant case? */
2444                 if (opt_params.allow_invar_unrolling)
2445                         unroll_nr = get_unroll_decision_invariant();
2446                 if (unroll_nr > 1)
2447                         loop_info.unroll_kind = invariant;
2448         }
2449
2450         DB((dbg, LEVEL_1, " *** Unrolling %d times ***\n", unroll_nr));
2451
2452         if (unroll_nr > 1) {
2453                 loop_entries = NEW_ARR_F(entry_edge, 0);
2454
2455                 /* Get loop outs */
2456                 irg_walk_graph(current_ir_graph, get_loop_entries, NULL, NULL);
2457
2458                 if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr)
2459                         loop_info.needs_backedge = 0;
2460                 else
2461                         loop_info.needs_backedge = 1;
2462
2463                 /* Use phase to keep copy of nodes from the condition chain. */
2464                 phase = new_phase(current_ir_graph, phase_irn_init_default);
2465
2466                 /* Copies the loop */
2467                 copy_loop(loop_entries, unroll_nr - 1);
2468
2469                 /* Line up the floating copies. */
2470                 place_copies(unroll_nr - 1);
2471
2472                 /* Remove phis with 1 in*/
2473                 irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);
2474
2475                 /* dump_ir_block_graph(current_ir_graph, "-DONE"); */
2476
2477                 if (loop_info.unroll_kind == constant)
2478                         count_stats(stats.constant_unroll);
2479                 else
2480                         count_stats(stats.invariant_unroll);
2481
2482                 set_irg_doms_inconsistent(current_ir_graph);
2483                 set_irg_loopinfo_inconsistent(current_ir_graph);
2484                 /* TODO is it? */
2485                 set_irg_outs_inconsistent(current_ir_graph);
2486
2487                 DEL_ARR_F(loop_entries);
2488         }
2489
2490 }
2491
2492 /* Analyzes the loop, and checks if size is within allowed range.
2493  * Decides if loop will be processed. */
2494 static void init_analyze(ir_loop *loop)
2495 {
2496         /* Expect no benefit of big loops. */
2497         /* TODO tuning/make parameter */
2498         int      loop_depth;
2499         unsigned max_loop_nodes = opt_params.max_loop_size;
2500         unsigned max_loop_nodes_adapted;
2501         int      depth_adaption = opt_params.depth_adaption;
2502
2503         cur_loop = loop;
2504
2505         loop_head = NULL;
2506         loop_head_valid = 1;
2507
2508         /* Reset loop info */
2509         memset(&loop_info, 0, sizeof(loop_info_t));
2510
2511         DB((dbg, LEVEL_1, "    >>>> current loop includes node %N <<<\n",
2512                 get_loop_node(loop, 0)));
2513
2514         /* Collect loop informations: head, node counts. */
2515         irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
2516
2517         /* Depth of 0 is the procedure and 1 a topmost loop. */
2518         loop_depth = get_loop_depth(loop) - 1;
2519
2520         /* Calculating in per mil. */
2521         max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth);
2522
2523         DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n",
2524                         max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption));
2525
2526         if (! (loop_info.nodes > 0))
2527                 return;
2528
2529 #if LOOP_IGNORE_NODE_LIMITS
2530         DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored."));
2531 #else
2532         if (loop_info.nodes > max_loop_nodes) {
2533                 /* Only for stats */
2534                 DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n",
2535                         loop_info.nodes, loop_depth, max_loop_nodes));
2536                 count_stats(stats.too_large);
2537                 /* no RETURN */
2538                 /* Adaption might change it */
2539         }
2540
2541         /* Limit processing to loops smaller than given parameter. */
2542         if (loop_info.nodes > max_loop_nodes_adapted) {
2543                 DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n",
2544                         loop_info.nodes, loop_depth, max_loop_nodes_adapted));
2545                 count_stats(stats.too_large_adapted);
2546                 return;
2547         }
2548
2549         if (loop_info.calls > opt_params.allowed_calls) {
2550                 DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n",
2551                         loop_info.calls, max_calls));
2552                 count_stats(stats.calls_limit);
2553                 return;
2554         }
2555 #endif
2556
2557         /* RETURN if there is no valid head */
2558         if (!loop_head || !loop_head_valid) {
2559                 DB((dbg, LEVEL_1,   "No valid loop head. Nothing done.\n"));
2560                 return;
2561         } else {
2562                 DB((dbg, LEVEL_1,   "Loophead: %N\n", loop_head));
2563         }
2564
2565         switch (loop_op) {
2566                 case loop_op_inversion:
2567                         loop_inversion();
2568                         break;
2569
2570                 case loop_op_unrolling:
2571                         unroll_loop();
2572                         break;
2573
2574                 default:
2575                         panic("Loop optimization not implemented.");
2576         }
2577         DB((dbg, LEVEL_1, "       <<<< end of loop with node %N >>>>\n",
2578                 get_loop_node(loop, 0)));
2579 }
2580
2581 /* Find innermost loops and add them to loops. */
2582 static void find_innermost_loop(ir_loop *loop)
2583 {
2584         /* descend into sons */
2585         int sons = get_loop_n_sons(loop);
2586
2587         if (sons == 0) {
2588                 ARR_APP1(ir_loop *, loops, loop);
2589         } else {
2590                 int s;
2591                 for (s=0; s<sons; s++) {
2592                         find_innermost_loop(get_loop_son(loop, s));
2593                 }
2594         }
2595 }
2596
2597 /* Assure preconditions are met and go through all loops. */
2598 void loop_optimization(ir_graph *irg)
2599 {
2600         ir_loop *loop;
2601         int     i, sons, nr;
2602
2603         /* SPEC2000: Total time 98.9% with inversion only */
2604         opt_params.max_loop_size = 100;
2605         opt_params.depth_adaption = 400;
2606         opt_params.count_phi = 0;
2607         opt_params.count_proj = 0;
2608         opt_params.allowed_calls = 0;
2609
2610         opt_params.max_cc_size = 100;
2611
2612         /* Unrolling not yet tested */
2613         opt_params.allow_const_unrolling = 1;
2614         opt_params.allow_invar_unrolling = 1;
2615
2616         /* Reset stats for this procedure */
2617         reset_stats();
2618
2619         /* Preconditions */
2620         set_current_ir_graph(irg);
2621
2622         edges_assure(irg);
2623         assure_irg_outs(irg);
2624
2625         /* NOTE: sets only the loop attribute of blocks, not nodes */
2626         /* NOTE: Kills links */
2627         assure_cf_loop(irg);
2628
2629         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
2630         collect_phiprojs(irg);
2631         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2632
2633         loop = get_irg_loop(irg);
2634         sons = get_loop_n_sons(loop);
2635
2636         loops = NEW_ARR_F(ir_loop *, 0);
2637         /* List all inner loops */
2638         for (nr = 0; nr < sons; ++nr) {
2639                 find_innermost_loop(get_loop_son(loop, nr));
2640         }
2641
2642         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2643         /* Set all links to NULL */
2644         irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2645
2646         for (i = 0; i < ARR_LEN(loops); ++i) {
2647                 ir_loop *loop = loops[i];
2648
2649                 count_stats(stats.loops);
2650
2651                 /* Analyze and handle loop */
2652                 init_analyze(loop);
2653
2654                 /* Copied blocks do not have their phi list yet */
2655                 collect_phiprojs(irg);
2656
2657                 /* Set links to NULL
2658                  * TODO Still necessary? */
2659                 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2660         }
2661
2662         print_stats();
2663
2664         DEL_ARR_F(loops);
2665         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2666         ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
2667 }
2668
2669 void do_loop_unrolling(ir_graph *irg)
2670 {
2671         loop_op = loop_op_unrolling;
2672
2673         DB((dbg, LEVEL_1, " >>> unrolling (Startnode %N) <<<\n",
2674                                 get_irg_start(irg)));
2675
2676         loop_optimization(irg);
2677
2678         DB((dbg, LEVEL_1, " >>> unrolling done (Startnode %N) <<<\n",
2679                                 get_irg_start(irg)));
2680 }
2681
2682 void do_loop_inversion(ir_graph *irg)
2683 {
2684         loop_op = loop_op_inversion;
2685
2686         DB((dbg, LEVEL_1, " >>> inversion (Startnode %N) <<<\n",
2687                                 get_irg_start(irg)));
2688
2689         loop_optimization(irg);
2690
2691         DB((dbg, LEVEL_1, " >>> inversion done (Startnode %N) <<<\n",
2692                                 get_irg_start(irg)));
2693 }
2694
2695 void do_loop_peeling(ir_graph *irg)
2696 {
2697         loop_op = loop_op_peeling;
2698
2699         DB((dbg, LEVEL_1, " >>> peeling (Startnode %N) <<<\n",
2700                                 get_irg_start(irg)));
2701
2702         loop_optimization(irg);
2703
2704         DB((dbg, LEVEL_1, " >>> peeling done (Startnode %N) <<<\n",
2705                                 get_irg_start(irg)));
2706
2707 }
2708
2709 ir_graph_pass_t *loop_inversion_pass(const char *name)
2710 {
2711         return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2712 }
2713
2714 ir_graph_pass_t *loop_unroll_pass(const char *name)
2715 {
2716         return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2717 }
2718
2719 ir_graph_pass_t *loop_peeling_pass(const char *name)
2720 {
2721         return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2722 }
2723
2724 void firm_init_loop_opt(void)
2725 {
2726         FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
2727 }