introduce Switch node
[libfirm] / ir / opt / parallelize_mem.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id: $
25  */
26 #include "config.h"
27
28 #include "iroptimize.h"
29
30 #include "array_t.h"
31 #include "debug.h"
32 #include "ircons.h"
33 #include "irgraph.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irmemory.h"
38 #include "irnode.h"
39 #include "irnodeset.h"
40 #include "obst.h"
41 #include "irdump.h"
42 #include "irflag_t.h"
43 #include "irprintf.h"
44 #include "irpass.h"
45 #include "opt_manage.h"
46
47 typedef struct parallelize_info
48 {
49         ir_node      *origin_block;
50         ir_node      *origin_ptr;
51         ir_mode      *origin_mode;
52         ir_nodeset_t  this_mem;
53         ir_nodeset_t  user_mem;
54 } parallelize_info;
55
56 static void parallelize_load(parallelize_info *pi, ir_node *irn)
57 {
58         /* There is no point in investigating the same subgraph twice */
59         if (ir_nodeset_contains(&pi->user_mem, irn))
60                 return;
61
62         if (get_nodes_block(irn) == pi->origin_block) {
63                 if (is_Proj(irn)) {
64                         ir_node *pred = get_Proj_pred(irn);
65                         if (is_Load(pred) &&
66                                         get_Load_volatility(pred) == volatility_non_volatile) {
67                                 ir_node *mem = get_Load_mem(pred);
68                                 //ir_nodeset_insert(&pi->this_mem, mem);
69                                 ir_nodeset_insert(&pi->user_mem, irn);
70                                 parallelize_load(pi, mem);
71                                 return;
72                         } else if (is_Store(pred) &&
73                                         get_Store_volatility(pred) == volatility_non_volatile) {
74                                 ir_mode *org_mode   = pi->origin_mode;
75                                 ir_node *org_ptr    = pi->origin_ptr;
76                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
77                                 ir_node *store_ptr  = get_Store_ptr(pred);
78                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
79                                         ir_node *mem = get_Store_mem(pred);
80                                         ir_nodeset_insert(&pi->user_mem, irn);
81                                         parallelize_load(pi, mem);
82                                         return;
83                                 }
84                         }
85                 } else if (is_Sync(irn)) {
86                         int n = get_Sync_n_preds(irn);
87                         int i;
88
89                         for (i = 0; i < n; ++i) {
90                                 ir_node *sync_pred = get_Sync_pred(irn, i);
91                                 parallelize_load(pi, sync_pred);
92                         }
93                         return;
94                 }
95         }
96         ir_nodeset_insert(&pi->this_mem, irn);
97 }
98
99 static void parallelize_store(parallelize_info *pi, ir_node *irn)
100 {
101         /* There is no point in investigating the same subgraph twice */
102         if (ir_nodeset_contains(&pi->user_mem, irn))
103                 return;
104
105         //ir_fprintf(stderr, "considering %+F\n", irn);
106         if (get_nodes_block(irn) == pi->origin_block) {
107                 if (is_Proj(irn)) {
108                         ir_node *pred = get_Proj_pred(irn);
109                         if (is_Load(pred) &&
110                                         get_Load_volatility(pred) == volatility_non_volatile) {
111                                 ir_mode *org_mode  = pi->origin_mode;
112                                 ir_node *org_ptr   = pi->origin_ptr;
113                                 ir_mode *load_mode = get_Load_mode(pred);
114                                 ir_node *load_ptr  = get_Load_ptr(pred);
115                                 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
116                                         ir_node *mem = get_Load_mem(pred);
117                                         ir_nodeset_insert(&pi->user_mem, irn);
118                                         parallelize_store(pi, mem);
119                                         return;
120                                 }
121                         } else if (is_Store(pred) &&
122                                         get_Store_volatility(pred) == volatility_non_volatile) {
123                                 ir_mode *org_mode   = pi->origin_mode;
124                                 ir_node *org_ptr    = pi->origin_ptr;
125                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
126                                 ir_node *store_ptr  = get_Store_ptr(pred);
127                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
128                                         ir_node *mem;
129
130                                         ir_nodeset_insert(&pi->user_mem, irn);
131                                         mem = get_Store_mem(pred);
132                                         parallelize_store(pi, mem);
133                                         return;
134                                 }
135                         }
136                 } else if (is_Sync(irn)) {
137                         int n = get_Sync_n_preds(irn);
138                         int i;
139
140                         for (i = 0; i < n; ++i) {
141                                 ir_node *sync_pred = get_Sync_pred(irn, i);
142                                 parallelize_store(pi, sync_pred);
143                         }
144                         return;
145                 }
146         }
147         ir_nodeset_insert(&pi->this_mem, irn);
148 }
149
150 static void walker(ir_node *proj, void *env)
151 {
152         ir_node          *mem_op;
153         ir_node          *pred;
154         ir_node          *block;
155         size_t            n;
156         parallelize_info  pi;
157
158         (void)env;
159
160         if (!is_Proj(proj)) return;
161         if (get_irn_mode(proj) != mode_M) return;
162
163         mem_op = get_Proj_pred(proj);
164         if (is_Load(mem_op)) {
165                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
166
167                 block = get_nodes_block(mem_op);
168                 pred  = get_Load_mem(mem_op);
169
170                 pi.origin_block = block,
171                 pi.origin_ptr   = get_Load_ptr(mem_op);
172                 pi.origin_mode  = get_Load_mode(mem_op);
173                 ir_nodeset_init(&pi.this_mem);
174                 ir_nodeset_init(&pi.user_mem);
175
176                 parallelize_load(&pi, pred);
177         } else if (is_Store(mem_op)) {
178                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
179
180                 block = get_nodes_block(mem_op);
181                 pred  = get_Store_mem(mem_op);
182
183                 pi.origin_block = block,
184                 pi.origin_ptr   = get_Store_ptr(mem_op);
185                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
186                 ir_nodeset_init(&pi.this_mem);
187                 ir_nodeset_init(&pi.user_mem);
188
189                 parallelize_store(&pi, pred);
190         } else {
191                 return;
192         }
193
194         n = ir_nodeset_size(&pi.user_mem);
195         if (n != 0) { /* nothing happened otherwise */
196                 ir_graph               *irg  = get_irn_irg(block);
197                 ir_node                *sync;
198                 ir_node               **in;
199                 ir_nodeset_iterator_t   iter;
200                 size_t                  i;
201
202                 ++n;
203                 NEW_ARR_A(ir_node*, in, n);
204                 i = 0;
205                 in[i++] = new_r_Unknown(irg, mode_M);
206                 ir_nodeset_iterator_init(&iter, &pi.user_mem);
207                 for (;;) {
208                         ir_node* p = ir_nodeset_iterator_next(&iter);
209                         if (p == NULL) break;
210                         in[i++] = p;
211                 }
212                 assert(i == n);
213                 sync = new_r_Sync(block, n, in);
214                 exchange(proj, sync);
215
216                 assert((long)pn_Load_M == (long)pn_Store_M);
217                 proj = new_r_Proj(mem_op, mode_M, pn_Load_M);
218                 set_Sync_pred(sync, 0, proj);
219
220                 n = ir_nodeset_size(&pi.this_mem);
221                 ir_nodeset_iterator_init(&iter, &pi.this_mem);
222                 if (n == 1) {
223                         sync = ir_nodeset_iterator_next(&iter);
224                 } else {
225                         NEW_ARR_A(ir_node*, in, n);
226                         i = 0;
227                         for (;;) {
228                                 ir_node* p = ir_nodeset_iterator_next(&iter);
229                                 if (p == NULL) break;
230                                 in[i++] = p;
231                         }
232                         assert(i == n);
233                         sync = new_r_Sync(block, n, in);
234                 }
235                 set_memop_mem(mem_op, sync);
236         }
237
238         ir_nodeset_destroy(&pi.this_mem);
239         ir_nodeset_destroy(&pi.user_mem);
240 }
241
242 static ir_graph_state_t do_parallelize_mem(ir_graph *irg)
243 {
244         irg_walk_graph(irg, NULL, walker, NULL);
245
246         return 0;
247 }
248
249 static optdesc_t opt_parallel_mem = {
250         "parallel-mem",
251         0,
252         do_parallelize_mem,
253 };
254
255 void opt_parallelize_mem(ir_graph *irg)
256 {
257         perform_irg_optimization(irg, &opt_parallel_mem);
258 }
259
260 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
261 {
262         return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);
263 }