3936cf32608f4da56183109694c7b438f54ac0b2
[libfirm] / ir / opt / parallelize_mem.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  */
25 #include "config.h"
26
27 #include "iroptimize.h"
28
29 #include "array_t.h"
30 #include "debug.h"
31 #include "ircons.h"
32 #include "irgraph.h"
33 #include "irgmod.h"
34 #include "irgopt.h"
35 #include "irgwalk.h"
36 #include "irmemory.h"
37 #include "irnode.h"
38 #include "irnodeset.h"
39 #include "obst.h"
40 #include "irdump.h"
41 #include "irflag_t.h"
42 #include "irprintf.h"
43 #include "irpass.h"
44
45 typedef struct parallelize_info
46 {
47         ir_node      *origin_block;
48         ir_node      *origin_ptr;
49         ir_mode      *origin_mode;
50         ir_nodeset_t  this_mem;
51         ir_nodeset_t  user_mem;
52 } parallelize_info;
53
54 static void parallelize_load(parallelize_info *pi, ir_node *irn)
55 {
56         /* There is no point in investigating the same subgraph twice */
57         if (ir_nodeset_contains(&pi->user_mem, irn))
58                 return;
59
60         if (get_nodes_block(irn) == pi->origin_block) {
61                 if (is_Proj(irn)) {
62                         ir_node *pred = get_Proj_pred(irn);
63                         if (is_Load(pred) &&
64                                         get_Load_volatility(pred) == volatility_non_volatile) {
65                                 ir_node *mem = get_Load_mem(pred);
66                                 //ir_nodeset_insert(&pi->this_mem, mem);
67                                 ir_nodeset_insert(&pi->user_mem, irn);
68                                 parallelize_load(pi, mem);
69                                 return;
70                         } else if (is_Store(pred) &&
71                                         get_Store_volatility(pred) == volatility_non_volatile) {
72                                 ir_mode *org_mode   = pi->origin_mode;
73                                 ir_node *org_ptr    = pi->origin_ptr;
74                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
75                                 ir_node *store_ptr  = get_Store_ptr(pred);
76                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
77                                         ir_node *mem = get_Store_mem(pred);
78                                         ir_nodeset_insert(&pi->user_mem, irn);
79                                         parallelize_load(pi, mem);
80                                         return;
81                                 }
82                         }
83                 } else if (is_Sync(irn)) {
84                         int n = get_Sync_n_preds(irn);
85                         int i;
86
87                         for (i = 0; i < n; ++i) {
88                                 ir_node *sync_pred = get_Sync_pred(irn, i);
89                                 parallelize_load(pi, sync_pred);
90                         }
91                         return;
92                 }
93         }
94         ir_nodeset_insert(&pi->this_mem, irn);
95 }
96
97 static void parallelize_store(parallelize_info *pi, ir_node *irn)
98 {
99         /* There is no point in investigating the same subgraph twice */
100         if (ir_nodeset_contains(&pi->user_mem, irn))
101                 return;
102
103         //ir_fprintf(stderr, "considering %+F\n", irn);
104         if (get_nodes_block(irn) == pi->origin_block) {
105                 if (is_Proj(irn)) {
106                         ir_node *pred = get_Proj_pred(irn);
107                         if (is_Load(pred) &&
108                                         get_Load_volatility(pred) == volatility_non_volatile) {
109                                 ir_mode *org_mode  = pi->origin_mode;
110                                 ir_node *org_ptr   = pi->origin_ptr;
111                                 ir_mode *load_mode = get_Load_mode(pred);
112                                 ir_node *load_ptr  = get_Load_ptr(pred);
113                                 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
114                                         ir_node *mem = get_Load_mem(pred);
115                                         ir_nodeset_insert(&pi->user_mem, irn);
116                                         parallelize_store(pi, mem);
117                                         return;
118                                 }
119                         } else if (is_Store(pred) &&
120                                         get_Store_volatility(pred) == volatility_non_volatile) {
121                                 ir_mode *org_mode   = pi->origin_mode;
122                                 ir_node *org_ptr    = pi->origin_ptr;
123                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
124                                 ir_node *store_ptr  = get_Store_ptr(pred);
125                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
126                                         ir_node *mem;
127
128                                         ir_nodeset_insert(&pi->user_mem, irn);
129                                         mem = get_Store_mem(pred);
130                                         parallelize_store(pi, mem);
131                                         return;
132                                 }
133                         }
134                 } else if (is_Sync(irn)) {
135                         int n = get_Sync_n_preds(irn);
136                         int i;
137
138                         for (i = 0; i < n; ++i) {
139                                 ir_node *sync_pred = get_Sync_pred(irn, i);
140                                 parallelize_store(pi, sync_pred);
141                         }
142                         return;
143                 }
144         }
145         ir_nodeset_insert(&pi->this_mem, irn);
146 }
147
148 static void walker(ir_node *proj, void *env)
149 {
150         ir_node          *mem_op;
151         ir_node          *pred;
152         ir_node          *block;
153         size_t            n;
154         parallelize_info  pi;
155
156         (void)env;
157
158         if (!is_Proj(proj)) return;
159         if (get_irn_mode(proj) != mode_M) return;
160
161         mem_op = get_Proj_pred(proj);
162         if (is_Load(mem_op)) {
163                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
164
165                 block = get_nodes_block(mem_op);
166                 pred  = get_Load_mem(mem_op);
167
168                 pi.origin_block = block,
169                 pi.origin_ptr   = get_Load_ptr(mem_op);
170                 pi.origin_mode  = get_Load_mode(mem_op);
171                 ir_nodeset_init(&pi.this_mem);
172                 ir_nodeset_init(&pi.user_mem);
173
174                 parallelize_load(&pi, pred);
175         } else if (is_Store(mem_op)) {
176                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
177
178                 block = get_nodes_block(mem_op);
179                 pred  = get_Store_mem(mem_op);
180
181                 pi.origin_block = block,
182                 pi.origin_ptr   = get_Store_ptr(mem_op);
183                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
184                 ir_nodeset_init(&pi.this_mem);
185                 ir_nodeset_init(&pi.user_mem);
186
187                 parallelize_store(&pi, pred);
188         } else {
189                 return;
190         }
191
192         n = ir_nodeset_size(&pi.user_mem);
193         if (n != 0) { /* nothing happened otherwise */
194                 ir_graph               *irg  = get_irn_irg(block);
195                 ir_node                *sync;
196                 ir_node               **in;
197                 ir_nodeset_iterator_t   iter;
198                 size_t                  i;
199
200                 ++n;
201                 NEW_ARR_A(ir_node*, in, n);
202                 i = 0;
203                 in[i++] = new_r_Unknown(irg, mode_M);
204                 ir_nodeset_iterator_init(&iter, &pi.user_mem);
205                 for (;;) {
206                         ir_node* p = ir_nodeset_iterator_next(&iter);
207                         if (p == NULL) break;
208                         in[i++] = p;
209                 }
210                 assert(i == n);
211                 sync = new_r_Sync(block, n, in);
212                 exchange(proj, sync);
213
214                 assert((long)pn_Load_M == (long)pn_Store_M);
215                 proj = new_r_Proj(mem_op, mode_M, pn_Load_M);
216                 set_Sync_pred(sync, 0, proj);
217
218                 n = ir_nodeset_size(&pi.this_mem);
219                 ir_nodeset_iterator_init(&iter, &pi.this_mem);
220                 if (n == 1) {
221                         sync = ir_nodeset_iterator_next(&iter);
222                 } else {
223                         NEW_ARR_A(ir_node*, in, n);
224                         i = 0;
225                         for (;;) {
226                                 ir_node* p = ir_nodeset_iterator_next(&iter);
227                                 if (p == NULL) break;
228                                 in[i++] = p;
229                         }
230                         assert(i == n);
231                         sync = new_r_Sync(block, n, in);
232                 }
233                 set_memop_mem(mem_op, sync);
234         }
235
236         ir_nodeset_destroy(&pi.this_mem);
237         ir_nodeset_destroy(&pi.user_mem);
238 }
239
240 void opt_parallelize_mem(ir_graph *irg)
241 {
242         irg_walk_graph(irg, NULL, walker, NULL);
243         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
244 }
245
246 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
247 {
248         return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);
249 }