typo
[libfirm] / ir / opt / ldst2.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id: $
25  */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include "iroptimize.h"
31
32 #include "array_t.h"
33 #include "debug.h"
34 #include "ircons.h"
35 #include "irgraph.h"
36 #include "irgmod.h"
37 #include "irgopt.h"
38 #include "irgwalk.h"
39 #include "irmemory.h"
40 #include "irnode.h"
41 #include "irnodeset.h"
42 #include "obst.h"
43 #include "irdump.h"
44 #include "irflag_t.h"
45 #include "irprintf.h"
46
47 #if +0
48 #define OPTIMISE_LOAD_AFTER_LOAD
49
50
51 #define UNIMPLEMENTED abort();
52
53
54 DEBUG_ONLY(static firm_dbg_module_t *dbg);
55
56
57 static struct obstack obst;
58 static size_t count_addrs;
59 static ir_node** addrs;
60
61
62 static void AddressCollector(ir_node* node, void* env)
63 {
64         ir_nodeset_t* addrs_set = env;
65         ir_node* addr;
66         if (is_Load(node)) {
67                 addr = get_Load_ptr(node);
68         } else if (is_Store(node)) {
69                 addr = get_Store_ptr(node);
70         } else {
71                 return;
72         }
73         ir_nodeset_insert(addrs_set, addr);
74 }
75
76
77 /* Collects all unique addresses used by load and store nodes of a graph and
78  * puts them into an array for later use */
79 static void CollectAddresses(ir_graph* irg)
80 {
81         ir_nodeset_t addrs_set;
82
83         ir_nodeset_init(&addrs_set);
84         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
85
86         count_addrs = ir_nodeset_size(&addrs_set);
87         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
88         if (count_addrs != 0) {
89                 ir_nodeset_iterator_t addr_iter;
90                 size_t i;
91
92                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
93                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
94                 for (i = 0; i < count_addrs; i++) {
95                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
96                         assert(addr != NULL);
97                         set_irn_link(addr, (void *)i);
98                         addrs[i] = addr;
99                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
100                 }
101         }
102 }
103
104
105 static void AliasSetAdder(ir_node* block, void* env)
106 {
107         ir_nodeset_t* alias_set;
108         size_t i;
109         (void) env;
110
111         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
112         for (i = 0; i < count_addrs; i++) {
113                 ir_nodeset_init(&alias_set[i]);
114         }
115         set_irn_link(block, alias_set);
116 }
117
118
119 static void SetStartAddressesTop(ir_graph* irg)
120 {
121         ir_node* initial_mem;
122         ir_node* start_block;
123         ir_nodeset_t* start_addrs;
124         size_t i;
125
126         initial_mem = get_irg_initial_mem(irg);
127         start_block = get_irg_start_block(irg);
128         start_addrs = get_irn_link(start_block);
129         for (i = 0; i < count_addrs; i++) {
130                 ir_nodeset_insert(&start_addrs[i], initial_mem);
131         }
132         mark_Block_block_visited(start_block);
133 }
134
135
136 static void AliasSetDestroyer(ir_node* block, void* env)
137 {
138         ir_nodeset_t* alias_set = get_irn_link(block);
139         size_t i;
140         (void) env;
141
142         for (i = 0; i < count_addrs; i++) {
143                 ir_nodeset_destroy(&alias_set[i]);
144         }
145 }
146
147
148 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
149 {
150         ir_node* other_addr;
151         ir_mode* other_mode;
152
153         if (is_Proj(other)) other = get_Proj_pred(other);
154
155         if (is_Load(other)) {
156                 other_addr = get_Load_ptr(other);
157         } else if (is_Store(other)) {
158                 other_addr = get_Store_ptr(other);
159         } else {
160                 return ir_may_alias;
161         }
162
163         other_mode = get_irn_mode(other);
164         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
165 }
166
167
168 static int in_cmp(void const* va, void const* vb)
169 {
170         ir_node const* const a = *(ir_node const*const*)va;
171         ir_node const* const b = *(ir_node const*const*)vb;
172         return get_irn_idx(a) - get_irn_idx(b);
173 }
174
175
176 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
177 {
178         size_t set_size = ir_nodeset_size(after_set);
179         ir_nodeset_iterator_t iter;
180
181         assert(set_size != 0);
182
183         ir_nodeset_iterator_init(&iter, after_set);
184         if (set_size == 1) {
185                 return ir_nodeset_iterator_next(&iter);
186         } else {
187                 ir_node** in;
188                 size_t i;
189
190                 NEW_ARR_A(ir_node*, in, set_size);
191                 for (i = 0; i < set_size; i++) {
192                         in[i] = ir_nodeset_iterator_next(&iter);
193                 }
194                 qsort(in, set_size, sizeof(*in), in_cmp);
195                 return new_r_Sync(irg, block, set_size, in);
196         }
197 }
198
199
200 static ir_node** unfinished_phis;
201
202
203 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
204 {
205         int unfinished = 0;
206         size_t block_n_preds = get_Block_n_cfgpreds(block);
207         ir_nodeset_t* thissets;
208         ir_node** in;
209         size_t i;
210         size_t j;
211
212         thissets = get_irn_link(block);
213         NEW_ARR_A(ir_node*, in, block_n_preds);
214         for (j = 0; j < count_addrs; j++) {
215                 ir_node* new_phi;
216
217                 for (i = 0; i < block_n_preds; i++) {
218                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
219                         ir_nodeset_t* predsets = get_irn_link(pred_block);
220                         size_t predset_size = ir_nodeset_size(&predsets[j]);
221
222                         if (predset_size == 0) {
223                                 in[i] = new_r_Unknown(irg, mode_M);
224                                 unfinished = 1;
225                         } else {
226                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
227                         }
228                 }
229                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
230                 if (unfinished) {
231                         set_irn_link(new_phi, unfinished_phis[j]);
232                         unfinished_phis[j] = new_phi;
233                 }
234                 ir_nodeset_insert(&thissets[j], new_phi);
235         }
236 }
237
238
239 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
240
241
242 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
243 {
244         size_t n = get_Phi_n_preds(phi);
245         size_t i;
246
247         for (i = 0; i < n; i++) {
248                 WalkMem(irg, get_Phi_pred(phi, i), block);
249         }
250
251         PlaceMemPhis(irg, block, phi);
252         exchange(phi, new_Bad());
253 }
254
255
256 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
257 {
258         ir_node* addr = get_Load_ptr(load);
259         size_t addr_idx = (size_t)get_irn_link(addr);
260         ir_nodeset_t* interfere_sets = get_irn_link(block);
261         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
262         size_t size = ir_nodeset_size(interfere_set);
263         ir_nodeset_iterator_t interfere_iter;
264         size_t i;
265
266         assert(size > 0);
267         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
268         if (size == 1) {
269                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
270                 assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
271                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
272                 set_Load_mem(load, after);
273         } else {
274                 ir_node** after_set;
275                 ir_node* after;
276                 ir_node* mem;
277                 size_t i;
278
279                 NEW_ARR_A(ir_node*, after_set, size);
280                 i = 0;
281                 while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
282                         if (is_Proj(mem)) {
283                                 ir_node* pred = get_Proj_pred(mem);
284                                 if (is_Load(pred)) {
285 #ifdef OPTIMISE_LOAD_AFTER_LOAD
286                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
287                                                 exchange(load, pred);
288                                                 return;
289                                         }
290 #endif
291                                         continue;
292                                 }
293                         }
294                         DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
295                         after_set[i++] = mem;
296                 }
297                 assert(i != 0);
298                 if (i == 1) {
299                         after = after_set[0];
300                 } else {
301                         after = new_r_Sync(irg, block, i, after_set);
302                 }
303                 set_Load_mem(load, after);
304         }
305
306         for (i = 0; i < count_addrs; i++) {
307                 ir_mode* mode = get_Load_mode(load);
308                 ir_node* other_addr = addrs[i];
309                 ir_mode* other_mode = mode; // XXX second mode is nonsense
310                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
311
312                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
313                 if (rel == ir_no_alias) {
314                         continue;
315                 }
316                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
317
318                 ir_nodeset_insert(&interfere_sets[i], memory);
319         }
320 }
321
322
323 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
324 {
325         ir_node* addr = get_Store_ptr(store);
326         size_t addr_idx = (size_t)get_irn_link(addr);
327         ir_nodeset_t* interfere_sets = get_irn_link(block);
328         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
329         ir_node* after;
330         size_t i;
331
332         after = GenerateSync(irg, block, interfere_set);
333         set_Store_mem(store, after);
334
335         for (i = 0; i < count_addrs; i++) {
336                 ir_nodeset_iterator_t interfere_iter;
337                 ir_mode* mode = get_irn_mode(get_Store_value(store));
338                 ir_node* other_addr = addrs[i];
339                 ir_mode* other_mode = mode; // XXX second mode is nonsense
340                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
341                 ir_node* other_node;
342
343                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
344                 if (rel == ir_no_alias) {
345                         continue;
346                 }
347                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
348
349                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
350                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
351                         if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) {
352                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
353                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
354                         }
355                 }
356
357                 ir_nodeset_insert(&interfere_sets[i], memory);
358         }
359 }
360
361
362 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
363 {
364         int block_change = 0;
365         ir_node* block = get_nodes_block(node);
366         ir_node* pred;
367         ir_node* memory = node;
368         ir_nodeset_t* addr_sets;
369
370         if (block != last_block) {
371                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
372                 block_change = 1;
373                 if (!Block_block_visited(block)) {
374                         mark_Block_block_visited(block);
375                 } else {
376                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
377                         return block_change;
378                 }
379         }
380
381         // Skip projs
382         if (is_Proj(node)) node = get_Proj_pred(node);
383
384         if (is_Phi(node)) {
385                 WalkMemPhi(irg, block, node);
386                 return block_change;
387         } else if (is_Sync(node)) {
388                 UNIMPLEMENTED
389         } else if (is_Return(node)) {
390                 pred = get_Return_mem(node);
391         } else {
392                 pred = get_fragile_op_mem(node);
393         }
394
395         if (WalkMem(irg, pred, block)) {
396                 // There was a block change
397                 size_t block_arity = get_Block_n_cfgpreds(block);
398
399                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
400                 if (block_arity == 1) {
401                         // Just one predecessor, inherit its alias sets
402                         ir_node* pred_block = get_nodes_block(pred);
403                         ir_nodeset_t* predsets = get_irn_link(pred_block);
404                         ir_nodeset_t* thissets = get_irn_link(block);
405                         size_t i;
406
407                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
408
409                         if (ir_nodeset_size(&predsets[0]) == 0) {
410                                 ir_node* unknown;
411
412                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
413                                 assert(Block_block_visited(pred_block));
414
415                                 unknown = new_r_Unknown(irg, mode_M);
416                                 for (i = 0; i < count_addrs; i++) {
417                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
418                                         DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
419                                         set_irn_link(phi_unk, unfinished_phis[i]);
420                                         unfinished_phis[i] = phi_unk;
421                                         ir_nodeset_insert(&thissets[i], phi_unk);
422                                 }
423                         } else {
424                                 for (i = 0; i < count_addrs; i++) {
425                                         ir_nodeset_iterator_t prediter;
426                                         ir_node* addr;
427
428                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
429                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
430                                                 ir_nodeset_insert(&thissets[i], addr);
431                                         }
432                                 }
433                         }
434                 }
435         }
436
437         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
438
439         addr_sets = get_irn_link(block);
440
441         if (is_Load(node)) {
442                 PlaceLoad(irg, block, node, memory);
443         } else if (is_Store(node)) {
444                 PlaceStore(irg, block, node, memory);
445         } else {
446                 ir_nodeset_t sync_set;
447                 size_t i;
448                 ir_node* after;
449
450                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
451
452                 ir_nodeset_init(&sync_set);
453                 for (i = 0; i < count_addrs; i++) {
454                         ir_nodeset_iterator_t iter;
455                         ir_node* mem;
456
457                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
458                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
459                                 ir_nodeset_insert(&sync_set, mem);
460                         }
461                 }
462
463                 after = GenerateSync(irg, block, &sync_set);
464                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
465
466                 for (i = 0; i < count_addrs; i++) {
467                         ir_nodeset_iterator_t iter;
468                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
469                         while (ir_nodeset_iterator_next(&iter) != NULL) {
470                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
471                         }
472                         ir_nodeset_insert(&addr_sets[i], memory);
473                 }
474         }
475
476         return block_change;
477 }
478
479
480 static void FinalisePhis(ir_graph* irg)
481 {
482         size_t i;
483
484         for (i = 0; i < count_addrs; i++) {
485                 ir_node* next_phi;
486                 ir_node* phi;
487
488                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
489                         ir_node* block = get_nodes_block(phi);
490                         size_t block_n_preds = get_Block_n_cfgpreds(block);
491
492                         next_phi = get_irn_link(phi);
493
494                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
495
496                         if (block_n_preds == 1) {
497                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
498                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
499                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
500
501                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
502                                 exchange(phi, after);
503                         } else {
504                                 ir_node** in;
505                                 size_t j;
506
507                                 NEW_ARR_A(ir_node*, in, block_n_preds);
508                                 for (j = 0; j < block_n_preds; j++) {
509                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
510                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
511
512                                         if (is_Unknown(get_Phi_pred(phi, j))) {
513                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
514                                         }
515                                 }
516                         }
517                 }
518         }
519 }
520
521
522 static void Detotalise(ir_graph* irg)
523 {
524         ir_node* end_block = get_irg_end_block(irg);
525         size_t npreds = get_Block_n_cfgpreds(end_block);
526         size_t i;
527
528         unfinished_phis = XMALLOCN(ir_node, count_addrs);
529         for (i = 0; i < count_addrs; i++) {
530                 unfinished_phis[i] = NULL;
531         }
532
533         for (i = 0; i < npreds; i++) {
534                 ir_node* pred = get_Block_cfgpred(end_block, i);
535                 assert(is_Return(pred));
536                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
537                 WalkMem(irg, pred, NULL);
538         }
539
540         FinalisePhis(irg);
541         xfree(unfinished_phis);
542 }
543 #endif
544
545
546 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
547 {
548         size_t n = get_Sync_n_preds(sync);
549         size_t i;
550
551         for (i = 0; i < n; i++) {
552                 ir_node* pred = get_Sync_pred(sync, i);
553                 if (is_Sync(pred)) {
554                         AddSyncPreds(preds, pred);
555                 } else {
556                         ir_nodeset_insert(preds, pred);
557                 }
558         }
559 }
560
561 #if 0
562 static void NormaliseSync(ir_node* node, void* env)
563 {
564         ir_nodeset_t preds;
565         ir_nodeset_iterator_t iter;
566         ir_node** in;
567         size_t count_preds;
568         size_t i;
569         (void) env;
570
571         if (!is_Sync(node)) return;
572
573         ir_nodeset_init(&preds);
574         AddSyncPreds(&preds, node);
575
576         count_preds = ir_nodeset_size(&preds);
577         if (count_preds != (unsigned)get_Sync_n_preds(node)) {
578                 NEW_ARR_A(ir_node*, in, count_preds);
579                 ir_nodeset_iterator_init(&iter, &preds);
580                 for (i = 0; i < count_preds; i++) {
581                         ir_node* pred = ir_nodeset_iterator_next(&iter);
582                         assert(pred != NULL);
583                         in[i] = pred;
584                 }
585                 set_irn_in(node, count_preds, in);
586         }
587
588         ir_nodeset_destroy(&preds);
589 }
590
591 void opt_ldst2(ir_graph* irg)
592 {
593         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
594         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
595
596         normalize_one_return(irg);
597         dump_ir_block_graph(irg, "-prefluffig");
598
599         obstack_init(&obst);
600
601         if (1 /* XXX */ || get_opt_alias_analysis()) {
602                 assure_irg_address_taken_computed(irg);
603                 assure_irp_globals_address_taken_computed();
604         }
605
606
607         CollectAddresses(irg);
608         if (count_addrs == 0) return;
609
610         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
611         inc_irg_block_visited(irg);
612         SetStartAddressesTop(irg);
613         Detotalise(irg);
614         dump_ir_block_graph(irg, "-fluffig");
615
616         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
617         obstack_free(&obst, NULL);
618
619         normalize_proj_nodes(irg);
620         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
621   optimize_graph_df(irg);
622         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
623         dump_ir_block_graph(irg, "-postfluffig");
624 }
625 #endif
626
627
628 typedef struct parallelise_info
629 {
630         ir_node      *origin_block;
631         ir_node      *origin_ptr;
632         ir_mode      *origin_mode;
633         ir_nodeset_t  this_mem;
634         ir_nodeset_t  user_mem;
635 } parallelise_info;
636
637
638 static void parallelise_load(parallelise_info *pi, ir_node *irn)
639 {
640         /* There is no point in investigating the same subgraph twice */
641         if (ir_nodeset_contains(&pi->user_mem, irn))
642                 return;
643
644         //ir_fprintf(stderr, "considering %+F\n", irn);
645         if (get_nodes_block(irn) == pi->origin_block) {
646                 if (is_Proj(irn)) {
647                         ir_node *pred = get_Proj_pred(irn);
648                         if (is_Load(pred) &&
649                                         get_Load_volatility(pred) == volatility_non_volatile) {
650                                 ir_node *mem = get_Load_mem(pred);
651                                 //ir_nodeset_insert(&pi->this_mem, mem);
652                                 ir_nodeset_insert(&pi->user_mem, irn);
653                                 //ir_fprintf(stderr, "adding %+F to user set\n", irn);
654                                 parallelise_load(pi, mem);
655                                 return;
656                         } else if (is_Store(pred) &&
657                                         get_Store_volatility(pred) == volatility_non_volatile) {
658                                 ir_mode *org_mode   = pi->origin_mode;
659                                 ir_node *org_ptr    = pi->origin_ptr;
660                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
661                                 ir_node *store_ptr  = get_Store_ptr(pred);
662                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
663                                         ir_node *mem = get_Store_mem(pred);
664                                         //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
665                                         ir_nodeset_insert(&pi->user_mem, irn);
666                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
667                                         parallelise_load(pi, mem);
668                                         return;
669                                 }
670                         }
671                 } else if (is_Sync(irn)) {
672                         int n = get_Sync_n_preds(irn);
673                         int i;
674
675                         for (i = 0; i < n; ++i) {
676                                 ir_node *sync_pred = get_Sync_pred(irn, i);
677                                 parallelise_load(pi, sync_pred);
678                         }
679                         return;
680                 }
681         }
682         ir_nodeset_insert(&pi->this_mem, irn);
683         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
684 }
685
686
687 static void parallelise_store(parallelise_info *pi, ir_node *irn)
688 {
689         /* There is no point in investigating the same subgraph twice */
690         if (ir_nodeset_contains(&pi->user_mem, irn))
691                 return;
692
693         //ir_fprintf(stderr, "considering %+F\n", irn);
694         if (get_nodes_block(irn) == pi->origin_block) {
695                 if (is_Proj(irn)) {
696                         ir_node *pred = get_Proj_pred(irn);
697                         if (is_Load(pred) &&
698                                         get_Load_volatility(pred) == volatility_non_volatile) {
699                                 ir_mode *org_mode  = pi->origin_mode;
700                                 ir_node *org_ptr   = pi->origin_ptr;
701                                 ir_mode *load_mode = get_Load_mode(pred);
702                                 ir_node *load_ptr  = get_Load_ptr(pred);
703                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
704                                         ir_node *mem = get_Load_mem(pred);
705                                         //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode);
706                                         ir_nodeset_insert(&pi->user_mem, irn);
707                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
708                                         parallelise_store(pi, mem);
709                                         return;
710                                 }
711                         } else if (is_Store(pred) &&
712                                         get_Store_volatility(pred) == volatility_non_volatile) {
713                                 ir_mode *org_mode   = pi->origin_mode;
714                                 ir_node *org_ptr    = pi->origin_ptr;
715                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
716                                 ir_node *store_ptr  = get_Store_ptr(pred);
717                                 if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
718                                         ir_node *mem;
719
720                                         //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
721                                         ir_nodeset_insert(&pi->user_mem, irn);
722                                         //ir_fprintf(stderr, "adding %+F to user set\n", irn);
723                                         mem = get_Store_mem(pred);
724                                         parallelise_store(pi, mem);
725                                         return;
726                                 }
727                         }
728                 } else if (is_Sync(irn)) {
729                         int n = get_Sync_n_preds(irn);
730                         int i;
731
732                         for (i = 0; i < n; ++i) {
733                                 ir_node *sync_pred = get_Sync_pred(irn, i);
734                                 parallelise_store(pi, sync_pred);
735                         }
736                         return;
737                 }
738         }
739         ir_nodeset_insert(&pi->this_mem, irn);
740         //ir_fprintf(stderr, "adding %+F to this set\n", irn);
741 }
742
743
744 static void walker(ir_node *proj, void *env)
745 {
746         ir_node          *mem_op;
747         ir_node          *pred;
748         ir_node          *block;
749         int               n;
750         parallelise_info  pi;
751
752         (void)env;
753
754         if (!is_Proj(proj)) return;
755         if (get_irn_mode(proj) != mode_M) return;
756
757         mem_op = get_Proj_pred(proj);
758         if (is_Load(mem_op)) {
759                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
760
761                 block = get_nodes_block(mem_op);
762                 pred  = get_Load_mem(mem_op);
763                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
764
765                 pi.origin_block = block,
766                 pi.origin_ptr   = get_Load_ptr(mem_op);
767                 pi.origin_mode  = get_Load_mode(mem_op);
768                 ir_nodeset_init(&pi.this_mem);
769                 ir_nodeset_init(&pi.user_mem);
770
771                 parallelise_load(&pi, pred);
772         } else if (is_Store(mem_op)) {
773                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
774
775                 block = get_nodes_block(mem_op);
776                 pred  = get_Store_mem(mem_op);
777                 //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
778
779                 pi.origin_block = block,
780                 pi.origin_ptr   = get_Store_ptr(mem_op);
781                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
782                 ir_nodeset_init(&pi.this_mem);
783                 ir_nodeset_init(&pi.user_mem);
784
785                 parallelise_store(&pi, pred);
786         } else {
787                 return;
788         }
789
790         n = ir_nodeset_size(&pi.user_mem);
791         if (n != 0) { /* nothing happened otherwise */
792                 ir_graph               *irg  = current_ir_graph;
793                 ir_node                *sync;
794                 ir_node               **in;
795                 ir_nodeset_iterator_t   iter;
796                 int                     i;
797
798                 ++n;
799                 //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n);
800                 NEW_ARR_A(ir_node*, in, n);
801                 i = 0;
802                 in[i++] = new_r_Unknown(irg, mode_M);
803                 ir_nodeset_iterator_init(&iter, &pi.user_mem);
804                 for (;;) {
805                         ir_node* p = ir_nodeset_iterator_next(&iter);
806                         if (p == NULL) break;
807                         in[i++] = p;
808                 }
809                 assert(i == n);
810                 sync = new_r_Sync(irg, block, n, in);
811                 exchange(proj, sync);
812
813                 assert(pn_Load_M == pn_Store_M);
814                 proj = new_r_Proj(irg, block, mem_op, mode_M, pn_Load_M);
815                 set_Sync_pred(sync, 0, proj);
816
817                 n = ir_nodeset_size(&pi.this_mem);
818                 //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n);
819                 ir_nodeset_iterator_init(&iter, &pi.this_mem);
820                 if (n == 1) {
821                         sync = ir_nodeset_iterator_next(&iter);
822                 } else {
823                         NEW_ARR_A(ir_node*, in, n);
824                         i = 0;
825                         for (;;) {
826                                 ir_node* p = ir_nodeset_iterator_next(&iter);
827                                 if (p == NULL) break;
828                                 in[i++] = p;
829                         }
830                         assert(i == n);
831                         sync = new_r_Sync(irg, block, n, in);
832                 }
833                 set_memop_mem(mem_op, sync);
834         }
835
836         ir_nodeset_destroy(&pi.this_mem);
837         ir_nodeset_destroy(&pi.user_mem);
838 }
839
840
841 void opt_sync(ir_graph *irg)
842 {
843         //assure_irg_entity_usage_computed(irg);
844         //assure_irp_globals_entity_usage_computed();
845
846         irg_walk_graph(irg, NULL, walker, NULL);
847         //optimize_graph_df(irg);
848         //irg_walk_graph(irg, NormaliseSync, NULL, NULL);
849 }