fix warnings
[libfirm] / ir / opt / ldst2.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id: $
25  */
26 #include "config.h"
27
28 #include "iroptimize.h"
29
30 #include "array_t.h"
31 #include "debug.h"
32 #include "ircons.h"
33 #include "irgraph.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irmemory.h"
38 #include "irnode.h"
39 #include "irnodeset.h"
40 #include "obst.h"
41 #include "irdump.h"
42 #include "irflag_t.h"
43 #include "irprintf.h"
44
45 #if +0
46 #define OPTIMISE_LOAD_AFTER_LOAD
47
48
49 #define UNIMPLEMENTED abort();
50
51
52 DEBUG_ONLY(static firm_dbg_module_t *dbg);
53
54
55 static struct obstack obst;
56 static size_t count_addrs;
57 static ir_node** addrs;
58
59
60 static void AddressCollector(ir_node* node, void* env)
61 {
62         ir_nodeset_t* addrs_set = env;
63         ir_node* addr;
64         if (is_Load(node)) {
65                 addr = get_Load_ptr(node);
66         } else if (is_Store(node)) {
67                 addr = get_Store_ptr(node);
68         } else {
69                 return;
70         }
71         ir_nodeset_insert(addrs_set, addr);
72 }
73
74
75 /* Collects all unique addresses used by load and store nodes of a graph and
76  * puts them into an array for later use */
77 static void CollectAddresses(ir_graph* irg)
78 {
79         ir_nodeset_t addrs_set;
80
81         ir_nodeset_init(&addrs_set);
82         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
83
84         count_addrs = ir_nodeset_size(&addrs_set);
85         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
86         if (count_addrs != 0) {
87                 ir_nodeset_iterator_t addr_iter;
88                 size_t i;
89
90                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
91                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
92                 for (i = 0; i < count_addrs; i++) {
93                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
94                         assert(addr != NULL);
95                         set_irn_link(addr, (void *)i);
96                         addrs[i] = addr;
97                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
98                 }
99         }
100 }
101
102
103 static void AliasSetAdder(ir_node* block, void* env)
104 {
105         ir_nodeset_t* alias_set;
106         size_t i;
107         (void) env;
108
109         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
110         for (i = 0; i < count_addrs; i++) {
111                 ir_nodeset_init(&alias_set[i]);
112         }
113         set_irn_link(block, alias_set);
114 }
115
116
117 static void SetStartAddressesTop(ir_graph* irg)
118 {
119         ir_node* initial_mem;
120         ir_node* start_block;
121         ir_nodeset_t* start_addrs;
122         size_t i;
123
124         initial_mem = get_irg_initial_mem(irg);
125         start_block = get_irg_start_block(irg);
126         start_addrs = get_irn_link(start_block);
127         for (i = 0; i < count_addrs; i++) {
128                 ir_nodeset_insert(&start_addrs[i], initial_mem);
129         }
130         mark_Block_block_visited(start_block);
131 }
132
133
134 static void AliasSetDestroyer(ir_node* block, void* env)
135 {
136         ir_nodeset_t* alias_set = get_irn_link(block);
137         size_t i;
138         (void) env;
139
140         for (i = 0; i < count_addrs; i++) {
141                 ir_nodeset_destroy(&alias_set[i]);
142         }
143 }
144
145
146 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
147 {
148         ir_node* other_addr;
149         ir_mode* other_mode;
150
151         if (is_Proj(other)) other = get_Proj_pred(other);
152
153         if (is_Load(other)) {
154                 other_addr = get_Load_ptr(other);
155         } else if (is_Store(other)) {
156                 other_addr = get_Store_ptr(other);
157         } else {
158                 return ir_may_alias;
159         }
160
161         other_mode = get_irn_mode(other);
162         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
163 }
164
165
166 static int in_cmp(void const* va, void const* vb)
167 {
168         ir_node const* const a = *(ir_node const*const*)va;
169         ir_node const* const b = *(ir_node const*const*)vb;
170         return get_irn_idx(a) - get_irn_idx(b);
171 }
172
173
174 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
175 {
176         size_t set_size = ir_nodeset_size(after_set);
177         ir_nodeset_iterator_t iter;
178
179         assert(set_size != 0);
180
181         ir_nodeset_iterator_init(&iter, after_set);
182         if (set_size == 1) {
183                 return ir_nodeset_iterator_next(&iter);
184         } else {
185                 ir_node** in;
186                 size_t i;
187
188                 NEW_ARR_A(ir_node*, in, set_size);
189                 for (i = 0; i < set_size; i++) {
190                         in[i] = ir_nodeset_iterator_next(&iter);
191                 }
192                 qsort(in, set_size, sizeof(*in), in_cmp);
193                 return new_r_Sync(irg, block, set_size, in);
194         }
195 }
196
197
198 static ir_node** unfinished_phis;
199
200
201 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
202 {
203         int unfinished = 0;
204         size_t block_n_preds = get_Block_n_cfgpreds(block);
205         ir_nodeset_t* thissets;
206         ir_node** in;
207         size_t i;
208         size_t j;
209
210         thissets = get_irn_link(block);
211         NEW_ARR_A(ir_node*, in, block_n_preds);
212         for (j = 0; j < count_addrs; j++) {
213                 ir_node* new_phi;
214
215                 for (i = 0; i < block_n_preds; i++) {
216                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
217                         ir_nodeset_t* predsets = get_irn_link(pred_block);
218                         size_t predset_size = ir_nodeset_size(&predsets[j]);
219
220                         if (predset_size == 0) {
221                                 in[i] = new_r_Unknown(irg, mode_M);
222                                 unfinished = 1;
223                         } else {
224                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
225                         }
226                 }
227                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
228                 if (unfinished) {
229                         set_irn_link(new_phi, unfinished_phis[j]);
230                         unfinished_phis[j] = new_phi;
231                 }
232                 ir_nodeset_insert(&thissets[j], new_phi);
233         }
234 }
235
236
237 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
238
239
240 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
241 {
242         size_t n = get_Phi_n_preds(phi);
243         size_t i;
244
245         for (i = 0; i < n; i++) {
246                 WalkMem(irg, get_Phi_pred(phi, i), block);
247         }
248
249         PlaceMemPhis(irg, block, phi);
250         exchange(phi, new_Bad());
251 }
252
253
254 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
255 {
256         ir_node* addr = get_Load_ptr(load);
257         size_t addr_idx = (size_t)get_irn_link(addr);
258         ir_nodeset_t* interfere_sets = get_irn_link(block);
259         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
260         size_t size = ir_nodeset_size(interfere_set);
261         ir_nodeset_iterator_t interfere_iter;
262         size_t i;
263
264         assert(size > 0);
265         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
266         if (size == 1) {
267                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
268                 assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
269                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
270                 set_Load_mem(load, after);
271         } else {
272                 ir_node** after_set;
273                 ir_node* after;
274                 ir_node* mem;
275                 size_t i;
276
277                 NEW_ARR_A(ir_node*, after_set, size);
278                 i = 0;
279                 while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
280                         if (is_Proj(mem)) {
281                                 ir_node* pred = get_Proj_pred(mem);
282                                 if (is_Load(pred)) {
283 #ifdef OPTIMISE_LOAD_AFTER_LOAD
284                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
285                                                 exchange(load, pred);
286                                                 return;
287                                         }
288 #endif
289                                         continue;
290                                 }
291                         }
292                         DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
293                         after_set[i++] = mem;
294                 }
295                 assert(i != 0);
296                 if (i == 1) {
297                         after = after_set[0];
298                 } else {
299                         after = new_r_Sync(irg, block, i, after_set);
300                 }
301                 set_Load_mem(load, after);
302         }
303
304         for (i = 0; i < count_addrs; i++) {
305                 ir_mode* mode = get_Load_mode(load);
306                 ir_node* other_addr = addrs[i];
307                 ir_mode* other_mode = mode; // XXX second mode is nonsense
308                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
309
310                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
311                 if (rel == ir_no_alias) {
312                         continue;
313                 }
314                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
315
316                 ir_nodeset_insert(&interfere_sets[i], memory);
317         }
318 }
319
320
321 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
322 {
323         ir_node* addr = get_Store_ptr(store);
324         size_t addr_idx = (size_t)get_irn_link(addr);
325         ir_nodeset_t* interfere_sets = get_irn_link(block);
326         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
327         ir_node* after;
328         size_t i;
329
330         after = GenerateSync(irg, block, interfere_set);
331         set_Store_mem(store, after);
332
333         for (i = 0; i < count_addrs; i++) {
334                 ir_nodeset_iterator_t interfere_iter;
335                 ir_mode* mode = get_irn_mode(get_Store_value(store));
336                 ir_node* other_addr = addrs[i];
337                 ir_mode* other_mode = mode; // XXX second mode is nonsense
338                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
339                 ir_node* other_node;
340
341                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
342                 if (rel == ir_no_alias) {
343                         continue;
344                 }
345                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
346
347                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
348                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
349                         if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) {
350                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
351                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
352                         }
353                 }
354
355                 ir_nodeset_insert(&interfere_sets[i], memory);
356         }
357 }
358
359
360 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
361 {
362         int block_change = 0;
363         ir_node* block = get_nodes_block(node);
364         ir_node* pred;
365         ir_node* memory = node;
366         ir_nodeset_t* addr_sets;
367
368         if (block != last_block) {
369                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
370                 block_change = 1;
371                 if (!Block_block_visited(block)) {
372                         mark_Block_block_visited(block);
373                 } else {
374                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
375                         return block_change;
376                 }
377         }
378
379         // Skip projs
380         if (is_Proj(node)) node = get_Proj_pred(node);
381
382         if (is_Phi(node)) {
383                 WalkMemPhi(irg, block, node);
384                 return block_change;
385         } else if (is_Sync(node)) {
386                 UNIMPLEMENTED
387         } else if (is_Return(node)) {
388                 pred = get_Return_mem(node);
389         } else {
390                 pred = get_fragile_op_mem(node);
391         }
392
393         if (WalkMem(irg, pred, block)) {
394                 // There was a block change
395                 size_t block_arity = get_Block_n_cfgpreds(block);
396
397                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
398                 if (block_arity == 1) {
399                         // Just one predecessor, inherit its alias sets
400                         ir_node* pred_block = get_nodes_block(pred);
401                         ir_nodeset_t* predsets = get_irn_link(pred_block);
402                         ir_nodeset_t* thissets = get_irn_link(block);
403                         size_t i;
404
405                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
406
407                         if (ir_nodeset_size(&predsets[0]) == 0) {
408                                 ir_node* unknown;
409
410                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
411                                 assert(Block_block_visited(pred_block));
412
413                                 unknown = new_r_Unknown(irg, mode_M);
414                                 for (i = 0; i < count_addrs; i++) {
415                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
416                                         DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
417                                         set_irn_link(phi_unk, unfinished_phis[i]);
418                                         unfinished_phis[i] = phi_unk;
419                                         ir_nodeset_insert(&thissets[i], phi_unk);
420                                 }
421                         } else {
422                                 for (i = 0; i < count_addrs; i++) {
423                                         ir_nodeset_iterator_t prediter;
424                                         ir_node* addr;
425
426                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
427                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
428                                                 ir_nodeset_insert(&thissets[i], addr);
429                                         }
430                                 }
431                         }
432                 }
433         }
434
435         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
436
437         addr_sets = get_irn_link(block);
438
439         if (is_Load(node)) {
440                 PlaceLoad(irg, block, node, memory);
441         } else if (is_Store(node)) {
442                 PlaceStore(irg, block, node, memory);
443         } else {
444                 ir_nodeset_t sync_set;
445                 size_t i;
446                 ir_node* after;
447
448                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
449
450                 ir_nodeset_init(&sync_set);
451                 for (i = 0; i < count_addrs; i++) {
452                         ir_nodeset_iterator_t iter;
453                         ir_node* mem;
454
455                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
456                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
457                                 ir_nodeset_insert(&sync_set, mem);
458                         }
459                 }
460
461                 after = GenerateSync(irg, block, &sync_set);
462                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
463
464                 for (i = 0; i < count_addrs; i++) {
465                         ir_nodeset_iterator_t iter;
466                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
467                         while (ir_nodeset_iterator_next(&iter) != NULL) {
468                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
469                         }
470                         ir_nodeset_insert(&addr_sets[i], memory);
471                 }
472         }
473
474         return block_change;
475 }
476
477
478 static void FinalisePhis(ir_graph* irg)
479 {
480         size_t i;
481
482         for (i = 0; i < count_addrs; i++) {
483                 ir_node* next_phi;
484                 ir_node* phi;
485
486                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
487                         ir_node* block = get_nodes_block(phi);
488                         size_t block_n_preds = get_Block_n_cfgpreds(block);
489
490                         next_phi = get_irn_link(phi);
491
492                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
493
494                         if (block_n_preds == 1) {
495                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
496                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
497                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
498
499                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
500                                 exchange(phi, after);
501                         } else {
502                                 ir_node** in;
503                                 size_t j;
504
505                                 NEW_ARR_A(ir_node*, in, block_n_preds);
506                                 for (j = 0; j < block_n_preds; j++) {
507                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
508                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
509
510                                         if (is_Unknown(get_Phi_pred(phi, j))) {
511                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
512                                         }
513                                 }
514                         }
515                 }
516         }
517 }
518
519
520 static void Detotalise(ir_graph* irg)
521 {
522         ir_node* end_block = get_irg_end_block(irg);
523         size_t npreds = get_Block_n_cfgpreds(end_block);
524         size_t i;
525
526         unfinished_phis = XMALLOCN(ir_node, count_addrs);
527         for (i = 0; i < count_addrs; i++) {
528                 unfinished_phis[i] = NULL;
529         }
530
531         for (i = 0; i < npreds; i++) {
532                 ir_node* pred = get_Block_cfgpred(end_block, i);
533                 assert(is_Return(pred));
534                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
535                 WalkMem(irg, pred, NULL);
536         }
537
538         FinalisePhis(irg);
539         xfree(unfinished_phis);
540 }
541 #endif
542
543
544 #if 0
545 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
546 {
547         size_t n = get_Sync_n_preds(sync);
548         size_t i;
549
550         for (i = 0; i < n; i++) {
551                 ir_node* pred = get_Sync_pred(sync, i);
552                 if (is_Sync(pred)) {
553                         AddSyncPreds(preds, pred);
554                 } else {
555                         ir_nodeset_insert(preds, pred);
556                 }
557         }
558 }
559
560 static void NormaliseSync(ir_node* node, void* env)
561 {
562         ir_nodeset_t preds;
563         ir_nodeset_iterator_t iter;
564         ir_node** in;
565         size_t count_preds;
566         size_t i;
567         (void) env;
568
569         if (!is_Sync(node)) return;
570
571         ir_nodeset_init(&preds);
572         AddSyncPreds(&preds, node);
573
574         count_preds = ir_nodeset_size(&preds);
575         if (count_preds != (unsigned)get_Sync_n_preds(node)) {
576                 NEW_ARR_A(ir_node*, in, count_preds);
577                 ir_nodeset_iterator_init(&iter, &preds);
578                 for (i = 0; i < count_preds; i++) {
579                         ir_node* pred = ir_nodeset_iterator_next(&iter);
580                         assert(pred != NULL);
581                         in[i] = pred;
582                 }
583                 set_irn_in(node, count_preds, in);
584         }
585
586         ir_nodeset_destroy(&preds);
587 }
588
589 void opt_ldst2(ir_graph* irg)
590 {
591         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
592         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
593
594         normalize_one_return(irg);
595         dump_ir_block_graph(irg, "-prefluffig");
596
597         obstack_init(&obst);
598
599         if (1 /* XXX */ || get_opt_alias_analysis()) {
600                 assure_irg_address_taken_computed(irg);
601                 assure_irp_globals_address_taken_computed();
602         }
603
604
605         CollectAddresses(irg);
606         if (count_addrs == 0) return;
607
608         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
609         inc_irg_block_visited(irg);
610         SetStartAddressesTop(irg);
611         Detotalise(irg);
612         dump_ir_block_graph(irg, "-fluffig");
613
614         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
615         obstack_free(&obst, NULL);
616
617         normalize_proj_nodes(irg);
618         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
619   optimize_graph_df(irg);
620         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
621         dump_ir_block_graph(irg, "-postfluffig");
622 }
623 #endif
624
625
626 typedef struct parallelise_info
627 {
628         ir_node      *origin_block;
629         ir_node      *origin_ptr;
630         ir_mode      *origin_mode;
631         ir_nodeset_t  this_mem;
632         ir_nodeset_t  user_mem;
633 } parallelise_info;
634
635
636 static void parallelise_load(parallelise_info *pi, ir_node *irn)
637 {
638         /* There is no point in investigating the same subgraph twice */
639         if (ir_nodeset_contains(&pi->user_mem, irn))
640                 return;
641
642         //ir_fprintf(stderr, "considering %+F\n", irn);
643         if (get_nodes_block(irn) == pi->origin_block) {
644                 if (is_Proj(irn)) {
645                         ir_node *pred = get_Proj_pred(irn);
646                         if (is_Load(pred) &&
647                                         get_Load_volatility(pred) == volatility_non_volatile) {
648                                 ir_node *mem = get_Load_mem(pred);
649                                 //ir_nodeset_insert(&pi->this_mem, mem);
650                                 ir_nodeset_insert(&pi->user_mem, irn);
651                                 //ir_fprintf(stderr, "adding %+F to user set\n", irn);
652                                 parallelise_load(pi, mem);
653                                 return;
654                         } else if (is_Store(pred) &&
655                                         get_Store_volatility(pred) == volatility_non_volatile) {
656                                 ir_mode *org_mode   = pi->origin_mode;
657                                 ir_node *org_ptr    = pi->origin_ptr;
658                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
659                                 ir_node *store_ptr  = get_Store_ptr(pred);
660                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
661                                         ir_node *mem = get_Store_mem(pred);
662                                         //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
663                                         ir_nodeset_insert(&pi->user_mem, irn);
664                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
665                                         parallelise_load(pi, mem);
666                                         return;
667                                 }
668                         }
669                 } else if (is_Sync(irn)) {
670                         int n = get_Sync_n_preds(irn);
671                         int i;
672
673                         for (i = 0; i < n; ++i) {
674                                 ir_node *sync_pred = get_Sync_pred(irn, i);
675                                 parallelise_load(pi, sync_pred);
676                         }
677                         return;
678                 }
679         }
680         ir_nodeset_insert(&pi->this_mem, irn);
681         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
682 }
683
684
685 static void parallelise_store(parallelise_info *pi, ir_node *irn)
686 {
687         /* There is no point in investigating the same subgraph twice */
688         if (ir_nodeset_contains(&pi->user_mem, irn))
689                 return;
690
691         //ir_fprintf(stderr, "considering %+F\n", irn);
692         if (get_nodes_block(irn) == pi->origin_block) {
693                 if (is_Proj(irn)) {
694                         ir_node *pred = get_Proj_pred(irn);
695                         if (is_Load(pred) &&
696                                         get_Load_volatility(pred) == volatility_non_volatile) {
697                                 ir_mode *org_mode  = pi->origin_mode;
698                                 ir_node *org_ptr   = pi->origin_ptr;
699                                 ir_mode *load_mode = get_Load_mode(pred);
700                                 ir_node *load_ptr  = get_Load_ptr(pred);
701                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
702                                         ir_node *mem = get_Load_mem(pred);
703                                         //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode);
704                                         ir_nodeset_insert(&pi->user_mem, irn);
705                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
706                                         parallelise_store(pi, mem);
707                                         return;
708                                 }
709                         } else if (is_Store(pred) &&
710                                         get_Store_volatility(pred) == volatility_non_volatile) {
711                                 ir_mode *org_mode   = pi->origin_mode;
712                                 ir_node *org_ptr    = pi->origin_ptr;
713                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
714                                 ir_node *store_ptr  = get_Store_ptr(pred);
715                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
716                                         ir_node *mem;
717
718                                         //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
719                                         ir_nodeset_insert(&pi->user_mem, irn);
720                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
721                                         mem = get_Store_mem(pred);
722                                         parallelise_store(pi, mem);
723                                         return;
724                                 }
725                         }
726                 } else if (is_Sync(irn)) {
727                         int n = get_Sync_n_preds(irn);
728                         int i;
729
730                         for (i = 0; i < n; ++i) {
731                                 ir_node *sync_pred = get_Sync_pred(irn, i);
732                                 parallelise_store(pi, sync_pred);
733                         }
734                         return;
735                 }
736         }
737         ir_nodeset_insert(&pi->this_mem, irn);
738         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
739 }
740
741
742 static void walker(ir_node *proj, void *env)
743 {
744         ir_node          *mem_op;
745         ir_node          *pred;
746         ir_node          *block;
747         int               n;
748         parallelise_info  pi;
749
750         (void)env;
751
752         if (!is_Proj(proj)) return;
753         if (get_irn_mode(proj) != mode_M) return;
754
755         mem_op = get_Proj_pred(proj);
756         if (is_Load(mem_op)) {
757                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
758
759                 block = get_nodes_block(mem_op);
760                 pred  = get_Load_mem(mem_op);
761                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
762
763                 pi.origin_block = block,
764                 pi.origin_ptr   = get_Load_ptr(mem_op);
765                 pi.origin_mode  = get_Load_mode(mem_op);
766                 ir_nodeset_init(&pi.this_mem);
767                 ir_nodeset_init(&pi.user_mem);
768
769                 parallelise_load(&pi, pred);
770         } else if (is_Store(mem_op)) {
771                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
772
773                 block = get_nodes_block(mem_op);
774                 pred  = get_Store_mem(mem_op);
775                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
776
777                 pi.origin_block = block,
778                 pi.origin_ptr   = get_Store_ptr(mem_op);
779                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
780                 ir_nodeset_init(&pi.this_mem);
781                 ir_nodeset_init(&pi.user_mem);
782
783                 parallelise_store(&pi, pred);
784         } else {
785                 return;
786         }
787
788         n = ir_nodeset_size(&pi.user_mem);
789         if (n != 0) { /* nothing happened otherwise */
790                 ir_graph               *irg  = current_ir_graph;
791                 ir_node                *sync;
792                 ir_node               **in;
793                 ir_nodeset_iterator_t   iter;
794                 int                     i;
795
796                 ++n;
797                 //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n);
798                 NEW_ARR_A(ir_node*, in, n);
799                 i = 0;
800                 in[i++] = new_r_Unknown(irg, mode_M);
801                 ir_nodeset_iterator_init(&iter, &pi.user_mem);
802                 for (;;) {
803                         ir_node* p = ir_nodeset_iterator_next(&iter);
804                         if (p == NULL) break;
805                         in[i++] = p;
806                 }
807                 assert(i == n);
808                 sync = new_r_Sync(irg, block, n, in);
809                 exchange(proj, sync);
810
811                 assert(pn_Load_M == pn_Store_M);
812                 proj = new_r_Proj(irg, block, mem_op, mode_M, pn_Load_M);
813                 set_Sync_pred(sync, 0, proj);
814
815                 n = ir_nodeset_size(&pi.this_mem);
816                 //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n);
817                 ir_nodeset_iterator_init(&iter, &pi.this_mem);
818                 if (n == 1) {
819                         sync = ir_nodeset_iterator_next(&iter);
820                 } else {
821                         NEW_ARR_A(ir_node*, in, n);
822                         i = 0;
823                         for (;;) {
824                                 ir_node* p = ir_nodeset_iterator_next(&iter);
825                                 if (p == NULL) break;
826                                 in[i++] = p;
827                         }
828                         assert(i == n);
829                         sync = new_r_Sync(irg, block, n, in);
830                 }
831                 set_memop_mem(mem_op, sync);
832         }
833
834         ir_nodeset_destroy(&pi.this_mem);
835         ir_nodeset_destroy(&pi.user_mem);
836 }
837
838
839 void opt_sync(ir_graph *irg)
840 {
841         //assure_irg_entity_usage_computed(irg);
842         //assure_irp_globals_entity_usage_computed();
843
844         irg_walk_graph(irg, NULL, walker, NULL);
845         //optimize_graph_df(irg);
846         //irg_walk_graph(irg, NormaliseSync, NULL, NULL);
847 }