9c2b962c002dde7c1d6f1605fe4f682cd991db3c
[libfirm] / ir / opt / ldst2.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include <stdint.h>
31
32 #include "array.h"
33 #include "debug.h"
34 #include "ircons.h"
35 #include "irgraph.h"
36 #include "irgmod.h"
37 #include "irgopt.h"
38 #include "irgwalk.h"
39 #include "irmemory.h"
40 #include "irnode.h"
41 #include "irnodeset.h"
42 #include "ldst2.h"
43 #include "obst.h"
44 #include "return.h"
45 #include "irdump.h"
46
47
48 #define OPTIMISE_LOAD_AFTER_LOAD
49
50
51 #define UNIMPLEMENTED abort();
52
53
54 DEBUG_ONLY(static firm_dbg_module_t *dbg);
55
56
57 static struct obstack obst;
58 static size_t count_addrs;
59 static ir_node** addrs;
60
61
62 static void AddressCollector(ir_node* node, void* env)
63 {
64         ir_nodeset_t* addrs_set = env;
65         ir_node* addr;
66         if (is_Load(node)) {
67                 addr = get_Load_ptr(node);
68         } else if (is_Store(node)) {
69                 addr = get_Store_ptr(node);
70         } else {
71                 return;
72         }
73         ir_nodeset_insert(addrs_set, addr);
74 }
75
76
77 /* Collects all unique addresses used by load and store nodes of a graph and
78  * puts them into an array for later use */
79 static void CollectAddresses(ir_graph* irg)
80 {
81         ir_nodeset_t addrs_set;
82
83         ir_nodeset_init(&addrs_set);
84         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
85
86         count_addrs = ir_nodeset_size(&addrs_set);
87         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
88         if (count_addrs != 0) {
89                 ir_nodeset_iterator_t addr_iter;
90                 size_t i;
91
92                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
93                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
94                 for (i = 0; i < count_addrs; i++) {
95                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
96                         assert(addr != NULL);
97                         set_irn_link(addr, (void*)(uintptr_t)i);
98                         addrs[i] = addr;
99                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
100                 }
101         }
102 }
103
104
105 static void AliasSetAdder(ir_node* block, void* env)
106 {
107         ir_nodeset_t* alias_set;
108         size_t i;
109
110         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
111         for (i = 0; i < count_addrs; i++) {
112                 ir_nodeset_init(&alias_set[i]);
113         }
114         set_irn_link(block, alias_set);
115 }
116
117
118 static void SetStartAddressesTop(ir_graph* irg)
119 {
120         ir_node* initial_mem;
121         ir_node* start_block;
122         ir_nodeset_t* start_addrs;
123         size_t i;
124
125         initial_mem = get_irg_initial_mem(irg);
126         start_block = get_irg_start_block(irg);
127         start_addrs = get_irn_link(start_block);
128         for (i = 0; i < count_addrs; i++) {
129                 ir_nodeset_insert(&start_addrs[i], initial_mem);
130         }
131         mark_Block_block_visited(start_block);
132 }
133
134
135 static void AliasSetDestroyer(ir_node* block, void* env)
136 {
137         ir_nodeset_t* alias_set = get_irn_link(block);
138         size_t i;
139
140         for (i = 0; i < count_addrs; i++) {
141                 ir_nodeset_destroy(&alias_set[i]);
142         }
143 }
144
145
146 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
147 {
148         ir_node* other_addr;
149         ir_mode* other_mode;
150
151         if (is_Proj(other)) other = get_Proj_pred(other);
152
153         if (is_Load(other)) {
154                 other_addr = get_Load_ptr(other);
155         } else if (is_Store(other)) {
156                 other_addr = get_Store_ptr(other);
157         } else {
158                 return may_alias;
159         }
160
161         other_mode = get_irn_mode(other);
162         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
163 }
164
165
166 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
167 {
168         size_t set_size = ir_nodeset_size(after_set);
169         ir_nodeset_iterator_t iter;
170
171         assert(set_size != 0);
172
173         ir_nodeset_iterator_init(&iter, after_set);
174         if (set_size == 1) {
175                 return ir_nodeset_iterator_next(&iter);
176         } else {
177                 ir_node** in;
178                 size_t i;
179
180                 NEW_ARR_A(ir_node*, in, set_size);
181                 for (i = 0; i < set_size; i++) {
182                         in[i] = ir_nodeset_iterator_next(&iter);
183                 }
184                 return new_r_Sync(irg, block, set_size, in);
185         }
186 }
187
188
189 static ir_node** unfinished_phis;
190
191
192 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
193 {
194         int unfinished = 0;
195         size_t block_n_preds = get_Block_n_cfgpreds(block);
196         ir_nodeset_t* thissets;
197         ir_node** in;
198         size_t i;
199         size_t j;
200
201         thissets = get_irn_link(block);
202         NEW_ARR_A(ir_node*, in, block_n_preds);
203         for (j = 0; j < count_addrs; j++) {
204                 ir_node* new_phi;
205
206                 for (i = 0; i < block_n_preds; i++) {
207                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
208                         ir_nodeset_t* predsets = get_irn_link(pred_block);
209                         size_t predset_size = ir_nodeset_size(&predsets[j]);
210
211                         if (predset_size == 0) {
212                                 in[i] = new_r_Unknown(irg, mode_M);
213                                 unfinished = 1;
214                         } else {
215                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
216                         }
217                 }
218                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
219                 if (unfinished) {
220                         set_irn_link(new_phi, unfinished_phis[j]);
221                         unfinished_phis[j] = new_phi;
222                 }
223                 ir_nodeset_insert(&thissets[j], new_phi);
224         }
225 }
226
227
228 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
229
230
231 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
232 {
233         size_t n = get_Phi_n_preds(phi);
234         size_t i;
235
236         for (i = 0; i < n; i++) {
237                 WalkMem(irg, get_Phi_pred(phi, i), block);
238         }
239
240         PlaceMemPhis(irg, block, phi);
241         exchange(phi, new_Bad());
242 }
243
244
245 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
246 {
247         ir_node* addr = get_Load_ptr(load);
248         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
249         ir_nodeset_t* interfere_sets = get_irn_link(block);
250         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
251         size_t size = ir_nodeset_size(interfere_set);
252         ir_nodeset_iterator_t interfere_iter;
253         size_t i;
254
255         assert(size > 0);
256         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
257         if (size == 1) {
258                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
259                 if (is_Proj(after)) {
260                         ir_node* pred = get_Proj_pred(after);
261                         if (is_Load(pred)) {
262 #ifdef OPTIMISE_LOAD_AFTER_LOAD
263                                 if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
264                                         exchange(load, pred);
265                                         return;
266                                 }
267 #endif
268                                 after = get_Load_mem(pred);
269                         }
270                 }
271                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
272                 set_Load_mem(load, after);
273         } else {
274                 ir_node** after_set;
275                 ir_node* sync;
276
277                 NEW_ARR_A(ir_node*, after_set, size);
278                 for (i = 0; i < size; i++) {
279                         ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
280                         if (is_Proj(mem)) {
281                                 ir_node* pred = get_Proj_pred(mem);
282                                 if (is_Load(pred)) {
283 #ifdef OPTIMISE_LOAD_AFTER_LOAD
284                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
285                                                 exchange(load, pred);
286                                                 return;
287                                         }
288 #endif
289                                         mem = get_Load_mem(pred);
290                                 }
291                         }
292                         after_set[i] = mem;
293                         sync = new_r_Sync(irg, block, size, after_set);
294                 }
295                 set_Load_mem(load, sync);
296         }
297
298         for (i = 0; i < count_addrs; i++) {
299                 ir_mode* mode = get_Load_mode(load);
300                 ir_node* other_addr = addrs[i];
301                 ir_mode* other_mode = mode; // XXX second mode is nonsense
302                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
303                 ir_node* other_node;
304
305                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
306                 if (rel == no_alias) {
307                         continue;
308                 }
309                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
310
311                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
312                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
313                         if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
314                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
315                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
316                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
317                         }
318                 }
319
320                 ir_nodeset_insert(&interfere_sets[i], memory);
321         }
322 }
323
324
325 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
326 {
327         ir_node* addr = get_Store_ptr(store);
328         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
329         ir_nodeset_t* interfere_sets = get_irn_link(block);
330         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
331         ir_node* after;
332         size_t i;
333
334         after = GenerateSync(irg, block, interfere_set);
335         set_Store_mem(store, after);
336
337         for (i = 0; i < count_addrs; i++) {
338                 ir_nodeset_iterator_t interfere_iter;
339                 ir_mode* mode = get_irn_mode(get_Store_value(store));
340                 ir_node* other_addr = addrs[i];
341                 ir_mode* other_mode = mode; // XXX second mode is nonsense
342                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
343                 ir_node* other_node;
344
345                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
346                 if (rel == no_alias) {
347                         continue;
348                 }
349                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
350
351                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
352                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
353                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
354                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
355                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
356                         }
357                 }
358
359                 ir_nodeset_insert(&interfere_sets[i], memory);
360         }
361 }
362
363
364 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
365 {
366         int block_change = 0;
367         ir_node* block = get_nodes_block(node);
368         ir_node* pred;
369         ir_node* memory = node;
370         ir_nodeset_t* addr_sets;
371
372         if (block != last_block) {
373                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
374                 block_change = 1;
375                 if (Block_not_block_visited(block)) {
376                         mark_Block_block_visited(block);
377                 } else {
378                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
379                         return block_change;
380                 }
381         }
382
383         // Skip projs
384         if (is_Proj(node)) node = get_Proj_pred(node);
385
386         if (is_Phi(node)) {
387                 WalkMemPhi(irg, block, node);
388                 return block_change;
389         } else if (is_Sync(node)) {
390                 UNIMPLEMENTED
391         } else if (is_Return(node)) {
392                 pred = get_Return_mem(node);
393         } else {
394                 pred = get_fragile_op_mem(node);
395         }
396
397         if (WalkMem(irg, pred, block)) {
398                 // There was a block change
399                 size_t block_arity = get_Block_n_cfgpreds(block);
400
401                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
402                 if (block_arity == 1) {
403                         // Just one predecessor, inherit its alias sets
404                         ir_node* pred_block = get_nodes_block(pred);
405                         ir_nodeset_t* predsets = get_irn_link(pred_block);
406                         ir_nodeset_t* thissets = get_irn_link(block);
407                         size_t i;
408
409                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
410
411                         if (ir_nodeset_size(&predsets[0]) == 0) {
412                                 ir_node* unknown;
413
414                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
415                                 assert(!Block_not_block_visited(pred_block));
416
417                                 unknown = new_r_Unknown(irg, mode_M);
418                                 for (i = 0; i < count_addrs; i++) {
419                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
420                                         set_irn_link(phi_unk, unfinished_phis[i]);
421                                         unfinished_phis[i] = phi_unk;
422                                         ir_nodeset_insert(&thissets[i], phi_unk);
423                                 }
424                         } else {
425                                 for (i = 0; i < count_addrs; i++) {
426                                         ir_nodeset_iterator_t prediter;
427                                         ir_node* addr;
428
429                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
430                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
431                                                 ir_nodeset_insert(&thissets[i], addr);
432                                         }
433                                 }
434                         }
435                 }
436         }
437
438         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
439
440         addr_sets = get_irn_link(block);
441
442         if (is_Load(node)) {
443                 PlaceLoad(irg, block, node, memory);
444         } else if (is_Store(node)) {
445                 PlaceStore(irg, block, node, memory);
446         } else {
447                 ir_nodeset_t sync_set;
448                 size_t i;
449                 ir_node* after;
450
451                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
452
453                 ir_nodeset_init(&sync_set);
454                 for (i = 0; i < count_addrs; i++) {
455                         ir_nodeset_iterator_t iter;
456                         ir_node* mem;
457
458                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
459                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
460                                 ir_nodeset_insert(&sync_set, mem);
461                         }
462                 }
463
464                 after = GenerateSync(irg, block, &sync_set);
465                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
466
467                 for (i = 0; i < count_addrs; i++) {
468                         ir_nodeset_iterator_t iter;
469                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
470                         while (ir_nodeset_iterator_next(&iter) != NULL) {
471                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
472                         }
473                         ir_nodeset_insert(&addr_sets[i], memory);
474                 }
475         }
476
477         return block_change;
478 }
479
480
481 static void FinalisePhis(ir_graph* irg)
482 {
483         size_t i;
484
485         for (i = 0; i < count_addrs; i++) {
486                 ir_node* next_phi;
487                 ir_node* phi;
488
489                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
490                         ir_node* block = get_nodes_block(phi);
491                         size_t block_n_preds = get_Block_n_cfgpreds(block);
492
493                         next_phi = get_irn_link(phi);
494
495                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
496
497                         if (block_n_preds == 1) {
498                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
499                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
500                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
501
502                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
503                                 exchange(phi, after);
504                         } else {
505                                 ir_node** in;
506                                 size_t j;
507
508                                 NEW_ARR_A(ir_node*, in, block_n_preds);
509                                 for (j = 0; j < block_n_preds; j++) {
510                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
511                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
512
513                                         if (is_Unknown(get_Phi_pred(phi, j))) {
514                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
515                                         }
516                                 }
517                         }
518                 }
519         }
520 }
521
522
523 static void Detotalise(ir_graph* irg)
524 {
525         ir_node* end_block = get_irg_end_block(irg);
526         size_t npreds = get_Block_n_cfgpreds(end_block);
527         size_t i;
528
529         unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs);
530         for (i = 0; i < count_addrs; i++) {
531                 unfinished_phis[i] = NULL;
532         }
533
534         for (i = 0; i < npreds; i++) {
535                 ir_node* pred = get_Block_cfgpred(end_block, i);
536                 assert(is_Return(pred));
537                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
538                 WalkMem(irg, pred, NULL);
539         }
540
541         FinalisePhis(irg);
542         xfree(unfinished_phis);
543 }
544
545
546 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
547 {
548         size_t n = get_Sync_n_preds(sync);
549         size_t i;
550
551         for (i = 0; i < n; i++) {
552                 ir_node* pred = get_Sync_pred(sync, i);
553                 if (is_Sync(pred)) {
554                         AddSyncPreds(preds, pred);
555                 } else {
556                         ir_nodeset_insert(preds, pred);
557                 }
558         }
559 }
560
561
562 static void NormaliseSync(ir_node* node, void* env)
563 {
564         ir_nodeset_t preds;
565         ir_nodeset_iterator_t iter;
566         ir_node** in;
567         size_t count_preds;
568         size_t i;
569
570         if (!is_Sync(node)) return;
571
572         ir_nodeset_init(&preds);
573         AddSyncPreds(&preds, node);
574
575         count_preds = ir_nodeset_size(&preds);
576         if (count_preds != get_Sync_n_preds(node)) {
577                 NEW_ARR_A(ir_node*, in, count_preds);
578                 ir_nodeset_iterator_init(&iter, &preds);
579                 for (i = 0; i < count_preds; i++) {
580                         ir_node* pred = ir_nodeset_iterator_next(&iter);
581                         assert(pred != NULL);
582                         in[i] = pred;
583                 }
584                 set_irn_in(node, count_preds, in);
585         }
586
587         ir_nodeset_destroy(&preds);
588 }
589
590
591 void opt_ldst2(ir_graph* irg)
592 {
593         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
594         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
595
596         normalize_one_return(irg);
597
598         obstack_init(&obst);
599
600         if (1 /* XXX */ || get_opt_alias_analysis()) {
601                 assure_irg_address_taken_computed(irg);
602                 assure_irp_globals_address_taken_computed();
603         }
604
605
606         CollectAddresses(irg);
607         if (count_addrs == 0) return;
608
609         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
610         inc_irg_block_visited(irg);
611         SetStartAddressesTop(irg);
612         Detotalise(irg);
613
614         dump_ir_block_graph(irg, "-fluffig");
615
616         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
617         obstack_free(&obst, NULL);
618
619         normalize_proj_nodes(irg);
620         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
621   optimize_graph_df(irg);
622         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
623 }