- add more passes
[libfirm] / ir / opt / ldst2.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id: $
25  */
26 #include "config.h"
27
28 #include "iroptimize.h"
29
30 #include "array_t.h"
31 #include "debug.h"
32 #include "ircons.h"
33 #include "irgraph.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irmemory.h"
38 #include "irnode.h"
39 #include "irnodeset.h"
40 #include "obst.h"
41 #include "irdump.h"
42 #include "irflag_t.h"
43 #include "irprintf.h"
44 #include "irtools.h"
45
46 #if +0
47 #define OPTIMISE_LOAD_AFTER_LOAD
48
49
50 #define UNIMPLEMENTED abort();
51
52
53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
54
55
56 static struct obstack obst;
57 static size_t count_addrs;
58 static ir_node** addrs;
59
60
61 static void AddressCollector(ir_node* node, void* env)
62 {
63         ir_nodeset_t* addrs_set = env;
64         ir_node* addr;
65         if (is_Load(node)) {
66                 addr = get_Load_ptr(node);
67         } else if (is_Store(node)) {
68                 addr = get_Store_ptr(node);
69         } else {
70                 return;
71         }
72         ir_nodeset_insert(addrs_set, addr);
73 }
74
75
76 /* Collects all unique addresses used by load and store nodes of a graph and
77  * puts them into an array for later use */
78 static void CollectAddresses(ir_graph* irg)
79 {
80         ir_nodeset_t addrs_set;
81
82         ir_nodeset_init(&addrs_set);
83         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
84
85         count_addrs = ir_nodeset_size(&addrs_set);
86         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
87         if (count_addrs != 0) {
88                 ir_nodeset_iterator_t addr_iter;
89                 size_t i;
90
91                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
92                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
93                 for (i = 0; i < count_addrs; i++) {
94                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
95                         assert(addr != NULL);
96                         set_irn_link(addr, (void *)i);
97                         addrs[i] = addr;
98                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
99                 }
100         }
101 }
102
103
104 static void AliasSetAdder(ir_node* block, void* env)
105 {
106         ir_nodeset_t* alias_set;
107         size_t i;
108         (void) env;
109
110         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
111         for (i = 0; i < count_addrs; i++) {
112                 ir_nodeset_init(&alias_set[i]);
113         }
114         set_irn_link(block, alias_set);
115 }
116
117
118 static void SetStartAddressesTop(ir_graph* irg)
119 {
120         ir_node* initial_mem;
121         ir_node* start_block;
122         ir_nodeset_t* start_addrs;
123         size_t i;
124
125         initial_mem = get_irg_initial_mem(irg);
126         start_block = get_irg_start_block(irg);
127         start_addrs = get_irn_link(start_block);
128         for (i = 0; i < count_addrs; i++) {
129                 ir_nodeset_insert(&start_addrs[i], initial_mem);
130         }
131         mark_Block_block_visited(start_block);
132 }
133
134
135 static void AliasSetDestroyer(ir_node* block, void* env)
136 {
137         ir_nodeset_t* alias_set = get_irn_link(block);
138         size_t i;
139         (void) env;
140
141         for (i = 0; i < count_addrs; i++) {
142                 ir_nodeset_destroy(&alias_set[i]);
143         }
144 }
145
146
147 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
148 {
149         ir_node* other_addr;
150         ir_mode* other_mode;
151
152         if (is_Proj(other)) other = get_Proj_pred(other);
153
154         if (is_Load(other)) {
155                 other_addr = get_Load_ptr(other);
156         } else if (is_Store(other)) {
157                 other_addr = get_Store_ptr(other);
158         } else {
159                 return ir_may_alias;
160         }
161
162         other_mode = get_irn_mode(other);
163         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
164 }
165
166
167 static int in_cmp(void const* va, void const* vb)
168 {
169         ir_node const* const a = *(ir_node const*const*)va;
170         ir_node const* const b = *(ir_node const*const*)vb;
171         return get_irn_idx(a) - get_irn_idx(b);
172 }
173
174
175 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
176 {
177         size_t set_size = ir_nodeset_size(after_set);
178         ir_nodeset_iterator_t iter;
179
180         assert(set_size != 0);
181
182         ir_nodeset_iterator_init(&iter, after_set);
183         if (set_size == 1) {
184                 return ir_nodeset_iterator_next(&iter);
185         } else {
186                 ir_node** in;
187                 size_t i;
188
189                 NEW_ARR_A(ir_node*, in, set_size);
190                 for (i = 0; i < set_size; i++) {
191                         in[i] = ir_nodeset_iterator_next(&iter);
192                 }
193                 qsort(in, set_size, sizeof(*in), in_cmp);
194                 return new_r_Sync(irg, block, set_size, in);
195         }
196 }
197
198
199 static ir_node** unfinished_phis;
200
201
202 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
203 {
204         int unfinished = 0;
205         size_t block_n_preds = get_Block_n_cfgpreds(block);
206         ir_nodeset_t* thissets;
207         ir_node** in;
208         size_t i;
209         size_t j;
210
211         thissets = get_irn_link(block);
212         NEW_ARR_A(ir_node*, in, block_n_preds);
213         for (j = 0; j < count_addrs; j++) {
214                 ir_node* new_phi;
215
216                 for (i = 0; i < block_n_preds; i++) {
217                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
218                         ir_nodeset_t* predsets = get_irn_link(pred_block);
219                         size_t predset_size = ir_nodeset_size(&predsets[j]);
220
221                         if (predset_size == 0) {
222                                 in[i] = new_r_Unknown(irg, mode_M);
223                                 unfinished = 1;
224                         } else {
225                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
226                         }
227                 }
228                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
229                 if (unfinished) {
230                         set_irn_link(new_phi, unfinished_phis[j]);
231                         unfinished_phis[j] = new_phi;
232                 }
233                 ir_nodeset_insert(&thissets[j], new_phi);
234         }
235 }
236
237
238 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
239
240
241 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
242 {
243         size_t n = get_Phi_n_preds(phi);
244         size_t i;
245
246         for (i = 0; i < n; i++) {
247                 WalkMem(irg, get_Phi_pred(phi, i), block);
248         }
249
250         PlaceMemPhis(irg, block, phi);
251         exchange(phi, new_Bad());
252 }
253
254
255 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
256 {
257         ir_node* addr = get_Load_ptr(load);
258         size_t addr_idx = (size_t)get_irn_link(addr);
259         ir_nodeset_t* interfere_sets = get_irn_link(block);
260         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
261         size_t size = ir_nodeset_size(interfere_set);
262         ir_nodeset_iterator_t interfere_iter;
263         size_t i;
264
265         assert(size > 0);
266         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
267         if (size == 1) {
268                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
269                 assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
270                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
271                 set_Load_mem(load, after);
272         } else {
273                 ir_node** after_set;
274                 ir_node* after;
275                 ir_node* mem;
276                 size_t i;
277
278                 NEW_ARR_A(ir_node*, after_set, size);
279                 i = 0;
280                 while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
281                         if (is_Proj(mem)) {
282                                 ir_node* pred = get_Proj_pred(mem);
283                                 if (is_Load(pred)) {
284 #ifdef OPTIMISE_LOAD_AFTER_LOAD
285                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
286                                                 exchange(load, pred);
287                                                 return;
288                                         }
289 #endif
290                                         continue;
291                                 }
292                         }
293                         DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
294                         after_set[i++] = mem;
295                 }
296                 assert(i != 0);
297                 if (i == 1) {
298                         after = after_set[0];
299                 } else {
300                         after = new_r_Sync(irg, block, i, after_set);
301                 }
302                 set_Load_mem(load, after);
303         }
304
305         for (i = 0; i < count_addrs; i++) {
306                 ir_mode* mode = get_Load_mode(load);
307                 ir_node* other_addr = addrs[i];
308                 ir_mode* other_mode = mode; // XXX second mode is nonsense
309                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
310
311                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
312                 if (rel == ir_no_alias) {
313                         continue;
314                 }
315                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
316
317                 ir_nodeset_insert(&interfere_sets[i], memory);
318         }
319 }
320
321
322 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
323 {
324         ir_node* addr = get_Store_ptr(store);
325         size_t addr_idx = (size_t)get_irn_link(addr);
326         ir_nodeset_t* interfere_sets = get_irn_link(block);
327         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
328         ir_node* after;
329         size_t i;
330
331         after = GenerateSync(irg, block, interfere_set);
332         set_Store_mem(store, after);
333
334         for (i = 0; i < count_addrs; i++) {
335                 ir_nodeset_iterator_t interfere_iter;
336                 ir_mode* mode = get_irn_mode(get_Store_value(store));
337                 ir_node* other_addr = addrs[i];
338                 ir_mode* other_mode = mode; // XXX second mode is nonsense
339                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
340                 ir_node* other_node;
341
342                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
343                 if (rel == ir_no_alias) {
344                         continue;
345                 }
346                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
347
348                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
349                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
350                         if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) {
351                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
352                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
353                         }
354                 }
355
356                 ir_nodeset_insert(&interfere_sets[i], memory);
357         }
358 }
359
360
361 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
362 {
363         int block_change = 0;
364         ir_node* block = get_nodes_block(node);
365         ir_node* pred;
366         ir_node* memory = node;
367         ir_nodeset_t* addr_sets;
368
369         if (block != last_block) {
370                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
371                 block_change = 1;
372                 if (!Block_block_visited(block)) {
373                         mark_Block_block_visited(block);
374                 } else {
375                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
376                         return block_change;
377                 }
378         }
379
380         // Skip projs
381         if (is_Proj(node)) node = get_Proj_pred(node);
382
383         if (is_Phi(node)) {
384                 WalkMemPhi(irg, block, node);
385                 return block_change;
386         } else if (is_Sync(node)) {
387                 UNIMPLEMENTED
388         } else if (is_Return(node)) {
389                 pred = get_Return_mem(node);
390         } else {
391                 pred = get_fragile_op_mem(node);
392         }
393
394         if (WalkMem(irg, pred, block)) {
395                 // There was a block change
396                 size_t block_arity = get_Block_n_cfgpreds(block);
397
398                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
399                 if (block_arity == 1) {
400                         // Just one predecessor, inherit its alias sets
401                         ir_node* pred_block = get_nodes_block(pred);
402                         ir_nodeset_t* predsets = get_irn_link(pred_block);
403                         ir_nodeset_t* thissets = get_irn_link(block);
404                         size_t i;
405
406                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
407
408                         if (ir_nodeset_size(&predsets[0]) == 0) {
409                                 ir_node* unknown;
410
411                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
412                                 assert(Block_block_visited(pred_block));
413
414                                 unknown = new_r_Unknown(irg, mode_M);
415                                 for (i = 0; i < count_addrs; i++) {
416                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
417                                         DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
418                                         set_irn_link(phi_unk, unfinished_phis[i]);
419                                         unfinished_phis[i] = phi_unk;
420                                         ir_nodeset_insert(&thissets[i], phi_unk);
421                                 }
422                         } else {
423                                 for (i = 0; i < count_addrs; i++) {
424                                         ir_nodeset_iterator_t prediter;
425                                         ir_node* addr;
426
427                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
428                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
429                                                 ir_nodeset_insert(&thissets[i], addr);
430                                         }
431                                 }
432                         }
433                 }
434         }
435
436         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
437
438         addr_sets = get_irn_link(block);
439
440         if (is_Load(node)) {
441                 PlaceLoad(irg, block, node, memory);
442         } else if (is_Store(node)) {
443                 PlaceStore(irg, block, node, memory);
444         } else {
445                 ir_nodeset_t sync_set;
446                 size_t i;
447                 ir_node* after;
448
449                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
450
451                 ir_nodeset_init(&sync_set);
452                 for (i = 0; i < count_addrs; i++) {
453                         ir_nodeset_iterator_t iter;
454                         ir_node* mem;
455
456                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
457                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
458                                 ir_nodeset_insert(&sync_set, mem);
459                         }
460                 }
461
462                 after = GenerateSync(irg, block, &sync_set);
463                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
464
465                 for (i = 0; i < count_addrs; i++) {
466                         ir_nodeset_iterator_t iter;
467                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
468                         while (ir_nodeset_iterator_next(&iter) != NULL) {
469                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
470                         }
471                         ir_nodeset_insert(&addr_sets[i], memory);
472                 }
473         }
474
475         return block_change;
476 }
477
478
479 static void FinalisePhis(ir_graph* irg)
480 {
481         size_t i;
482
483         for (i = 0; i < count_addrs; i++) {
484                 ir_node* next_phi;
485                 ir_node* phi;
486
487                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
488                         ir_node* block = get_nodes_block(phi);
489                         size_t block_n_preds = get_Block_n_cfgpreds(block);
490
491                         next_phi = get_irn_link(phi);
492
493                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
494
495                         if (block_n_preds == 1) {
496                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
497                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
498                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
499
500                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
501                                 exchange(phi, after);
502                         } else {
503                                 ir_node** in;
504                                 size_t j;
505
506                                 NEW_ARR_A(ir_node*, in, block_n_preds);
507                                 for (j = 0; j < block_n_preds; j++) {
508                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
509                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
510
511                                         if (is_Unknown(get_Phi_pred(phi, j))) {
512                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
513                                         }
514                                 }
515                         }
516                 }
517         }
518 }
519
520
521 static void Detotalise(ir_graph* irg)
522 {
523         ir_node* end_block = get_irg_end_block(irg);
524         size_t npreds = get_Block_n_cfgpreds(end_block);
525         size_t i;
526
527         unfinished_phis = XMALLOCN(ir_node, count_addrs);
528         for (i = 0; i < count_addrs; i++) {
529                 unfinished_phis[i] = NULL;
530         }
531
532         for (i = 0; i < npreds; i++) {
533                 ir_node* pred = get_Block_cfgpred(end_block, i);
534                 assert(is_Return(pred));
535                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
536                 WalkMem(irg, pred, NULL);
537         }
538
539         FinalisePhis(irg);
540         xfree(unfinished_phis);
541 }
542 #endif
543
544
545 #if 0
546 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
547 {
548         size_t n = get_Sync_n_preds(sync);
549         size_t i;
550
551         for (i = 0; i < n; i++) {
552                 ir_node* pred = get_Sync_pred(sync, i);
553                 if (is_Sync(pred)) {
554                         AddSyncPreds(preds, pred);
555                 } else {
556                         ir_nodeset_insert(preds, pred);
557                 }
558         }
559 }
560
561 static void NormaliseSync(ir_node* node, void* env)
562 {
563         ir_nodeset_t preds;
564         ir_nodeset_iterator_t iter;
565         ir_node** in;
566         size_t count_preds;
567         size_t i;
568         (void) env;
569
570         if (!is_Sync(node)) return;
571
572         ir_nodeset_init(&preds);
573         AddSyncPreds(&preds, node);
574
575         count_preds = ir_nodeset_size(&preds);
576         if (count_preds != (unsigned)get_Sync_n_preds(node)) {
577                 NEW_ARR_A(ir_node*, in, count_preds);
578                 ir_nodeset_iterator_init(&iter, &preds);
579                 for (i = 0; i < count_preds; i++) {
580                         ir_node* pred = ir_nodeset_iterator_next(&iter);
581                         assert(pred != NULL);
582                         in[i] = pred;
583                 }
584                 set_irn_in(node, count_preds, in);
585         }
586
587         ir_nodeset_destroy(&preds);
588 }
589
590 void opt_ldst2(ir_graph* irg)
591 {
592         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
593         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
594
595         normalize_one_return(irg);
596         dump_ir_block_graph(irg, "-prefluffig");
597
598         obstack_init(&obst);
599
600         if (1 /* XXX */ || get_opt_alias_analysis()) {
601                 assure_irg_address_taken_computed(irg);
602                 assure_irp_globals_address_taken_computed();
603         }
604
605
606         CollectAddresses(irg);
607         if (count_addrs == 0) return;
608
609         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
610         inc_irg_block_visited(irg);
611         SetStartAddressesTop(irg);
612         Detotalise(irg);
613         dump_ir_block_graph(irg, "-fluffig");
614
615         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
616         obstack_free(&obst, NULL);
617
618         normalize_proj_nodes(irg);
619         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
620   optimize_graph_df(irg);
621         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
622         dump_ir_block_graph(irg, "-postfluffig");
623 }
624 #endif
625
626
627 typedef struct parallelise_info
628 {
629         ir_node      *origin_block;
630         ir_node      *origin_ptr;
631         ir_mode      *origin_mode;
632         ir_nodeset_t  this_mem;
633         ir_nodeset_t  user_mem;
634 } parallelise_info;
635
636
637 static void parallelise_load(parallelise_info *pi, ir_node *irn)
638 {
639         /* There is no point in investigating the same subgraph twice */
640         if (ir_nodeset_contains(&pi->user_mem, irn))
641                 return;
642
643         //ir_fprintf(stderr, "considering %+F\n", irn);
644         if (get_nodes_block(irn) == pi->origin_block) {
645                 if (is_Proj(irn)) {
646                         ir_node *pred = get_Proj_pred(irn);
647                         if (is_Load(pred) &&
648                                         get_Load_volatility(pred) == volatility_non_volatile) {
649                                 ir_node *mem = get_Load_mem(pred);
650                                 //ir_nodeset_insert(&pi->this_mem, mem);
651                                 ir_nodeset_insert(&pi->user_mem, irn);
652                                 //ir_fprintf(stderr, "adding %+F to user set\n", irn);
653                                 parallelise_load(pi, mem);
654                                 return;
655                         } else if (is_Store(pred) &&
656                                         get_Store_volatility(pred) == volatility_non_volatile) {
657                                 ir_mode *org_mode   = pi->origin_mode;
658                                 ir_node *org_ptr    = pi->origin_ptr;
659                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
660                                 ir_node *store_ptr  = get_Store_ptr(pred);
661                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
662                                         ir_node *mem = get_Store_mem(pred);
663                                         //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
664                                         ir_nodeset_insert(&pi->user_mem, irn);
665                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
666                                         parallelise_load(pi, mem);
667                                         return;
668                                 }
669                         }
670                 } else if (is_Sync(irn)) {
671                         int n = get_Sync_n_preds(irn);
672                         int i;
673
674                         for (i = 0; i < n; ++i) {
675                                 ir_node *sync_pred = get_Sync_pred(irn, i);
676                                 parallelise_load(pi, sync_pred);
677                         }
678                         return;
679                 }
680         }
681         ir_nodeset_insert(&pi->this_mem, irn);
682         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
683 }
684
685
686 static void parallelise_store(parallelise_info *pi, ir_node *irn)
687 {
688         /* There is no point in investigating the same subgraph twice */
689         if (ir_nodeset_contains(&pi->user_mem, irn))
690                 return;
691
692         //ir_fprintf(stderr, "considering %+F\n", irn);
693         if (get_nodes_block(irn) == pi->origin_block) {
694                 if (is_Proj(irn)) {
695                         ir_node *pred = get_Proj_pred(irn);
696                         if (is_Load(pred) &&
697                                         get_Load_volatility(pred) == volatility_non_volatile) {
698                                 ir_mode *org_mode  = pi->origin_mode;
699                                 ir_node *org_ptr   = pi->origin_ptr;
700                                 ir_mode *load_mode = get_Load_mode(pred);
701                                 ir_node *load_ptr  = get_Load_ptr(pred);
702                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
703                                         ir_node *mem = get_Load_mem(pred);
704                                         //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode);
705                                         ir_nodeset_insert(&pi->user_mem, irn);
706                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
707                                         parallelise_store(pi, mem);
708                                         return;
709                                 }
710                         } else if (is_Store(pred) &&
711                                         get_Store_volatility(pred) == volatility_non_volatile) {
712                                 ir_mode *org_mode   = pi->origin_mode;
713                                 ir_node *org_ptr    = pi->origin_ptr;
714                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
715                                 ir_node *store_ptr  = get_Store_ptr(pred);
716                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
717                                         ir_node *mem;
718
719                                         //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
720                                         ir_nodeset_insert(&pi->user_mem, irn);
721                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
722                                         mem = get_Store_mem(pred);
723                                         parallelise_store(pi, mem);
724                                         return;
725                                 }
726                         }
727                 } else if (is_Sync(irn)) {
728                         int n = get_Sync_n_preds(irn);
729                         int i;
730
731                         for (i = 0; i < n; ++i) {
732                                 ir_node *sync_pred = get_Sync_pred(irn, i);
733                                 parallelise_store(pi, sync_pred);
734                         }
735                         return;
736                 }
737         }
738         ir_nodeset_insert(&pi->this_mem, irn);
739         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
740 }
741
742
743 static void walker(ir_node *proj, void *env)
744 {
745         ir_node          *mem_op;
746         ir_node          *pred;
747         ir_node          *block;
748         int               n;
749         parallelise_info  pi;
750
751         (void)env;
752
753         if (!is_Proj(proj)) return;
754         if (get_irn_mode(proj) != mode_M) return;
755
756         mem_op = get_Proj_pred(proj);
757         if (is_Load(mem_op)) {
758                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
759
760                 block = get_nodes_block(mem_op);
761                 pred  = get_Load_mem(mem_op);
762                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
763
764                 pi.origin_block = block,
765                 pi.origin_ptr   = get_Load_ptr(mem_op);
766                 pi.origin_mode  = get_Load_mode(mem_op);
767                 ir_nodeset_init(&pi.this_mem);
768                 ir_nodeset_init(&pi.user_mem);
769
770                 parallelise_load(&pi, pred);
771         } else if (is_Store(mem_op)) {
772                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
773
774                 block = get_nodes_block(mem_op);
775                 pred  = get_Store_mem(mem_op);
776                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
777
778                 pi.origin_block = block,
779                 pi.origin_ptr   = get_Store_ptr(mem_op);
780                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
781                 ir_nodeset_init(&pi.this_mem);
782                 ir_nodeset_init(&pi.user_mem);
783
784                 parallelise_store(&pi, pred);
785         } else {
786                 return;
787         }
788
789         n = ir_nodeset_size(&pi.user_mem);
790         if (n != 0) { /* nothing happened otherwise */
791                 ir_graph               *irg  = current_ir_graph;
792                 ir_node                *sync;
793                 ir_node               **in;
794                 ir_nodeset_iterator_t   iter;
795                 int                     i;
796
797                 ++n;
798                 //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n);
799                 NEW_ARR_A(ir_node*, in, n);
800                 i = 0;
801                 in[i++] = new_r_Unknown(irg, mode_M);
802                 ir_nodeset_iterator_init(&iter, &pi.user_mem);
803                 for (;;) {
804                         ir_node* p = ir_nodeset_iterator_next(&iter);
805                         if (p == NULL) break;
806                         in[i++] = p;
807                 }
808                 assert(i == n);
809                 sync = new_r_Sync(block, n, in);
810                 exchange(proj, sync);
811
812                 assert(pn_Load_M == pn_Store_M);
813                 proj = new_r_Proj(block, mem_op, mode_M, pn_Load_M);
814                 set_Sync_pred(sync, 0, proj);
815
816                 n = ir_nodeset_size(&pi.this_mem);
817                 //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n);
818                 ir_nodeset_iterator_init(&iter, &pi.this_mem);
819                 if (n == 1) {
820                         sync = ir_nodeset_iterator_next(&iter);
821                 } else {
822                         NEW_ARR_A(ir_node*, in, n);
823                         i = 0;
824                         for (;;) {
825                                 ir_node* p = ir_nodeset_iterator_next(&iter);
826                                 if (p == NULL) break;
827                                 in[i++] = p;
828                         }
829                         assert(i == n);
830                         sync = new_r_Sync(block, n, in);
831                 }
832                 set_memop_mem(mem_op, sync);
833         }
834
835         ir_nodeset_destroy(&pi.this_mem);
836         ir_nodeset_destroy(&pi.user_mem);
837 }
838
839
840 void opt_sync(ir_graph *irg)
841 {
842         //assure_irg_entity_usage_computed(irg);
843         //assure_irp_globals_entity_usage_computed();
844
845         irg_walk_graph(irg, NULL, walker, NULL);
846         //optimize_graph_df(irg);
847         //irg_walk_graph(irg, NormaliseSync, NULL, NULL);
848 }
849
850 ir_graph_pass_t *opt_sync_pass(const char *name, int verify, int dump)
851 {
852         return def_graph_pass(name ? name : "opt_sync", verify, dump, opt_sync);
853 }