2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief parallelizing Load/Store optimisation
23 * @author Christoph Mallon
27 #include "iroptimize.h"
38 #include "irnodeset.h"
46 typedef struct parallelize_info
48 ir_node *origin_block;
51 ir_nodeset_t this_mem;
52 ir_nodeset_t user_mem;
55 static void parallelize_load(parallelize_info *pi, ir_node *irn)
57 /* There is no point in investigating the same subgraph twice */
58 if (ir_nodeset_contains(&pi->user_mem, irn))
61 if (get_nodes_block(irn) == pi->origin_block) {
63 ir_node *pred = get_Proj_pred(irn);
65 get_Load_volatility(pred) == volatility_non_volatile) {
66 ir_node *mem = get_Load_mem(pred);
67 //ir_nodeset_insert(&pi->this_mem, mem);
68 ir_nodeset_insert(&pi->user_mem, irn);
69 parallelize_load(pi, mem);
71 } else if (is_Store(pred) &&
72 get_Store_volatility(pred) == volatility_non_volatile) {
73 ir_mode *org_mode = pi->origin_mode;
74 ir_node *org_ptr = pi->origin_ptr;
75 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
76 ir_node *store_ptr = get_Store_ptr(pred);
77 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
78 ir_node *mem = get_Store_mem(pred);
79 ir_nodeset_insert(&pi->user_mem, irn);
80 parallelize_load(pi, mem);
84 } else if (is_Sync(irn)) {
85 int n = get_Sync_n_preds(irn);
88 for (i = 0; i < n; ++i) {
89 ir_node *sync_pred = get_Sync_pred(irn, i);
90 parallelize_load(pi, sync_pred);
95 ir_nodeset_insert(&pi->this_mem, irn);
98 static void parallelize_store(parallelize_info *pi, ir_node *irn)
100 /* There is no point in investigating the same subgraph twice */
101 if (ir_nodeset_contains(&pi->user_mem, irn))
104 //ir_fprintf(stderr, "considering %+F\n", irn);
105 if (get_nodes_block(irn) == pi->origin_block) {
107 ir_node *pred = get_Proj_pred(irn);
109 get_Load_volatility(pred) == volatility_non_volatile) {
110 ir_mode *org_mode = pi->origin_mode;
111 ir_node *org_ptr = pi->origin_ptr;
112 ir_mode *load_mode = get_Load_mode(pred);
113 ir_node *load_ptr = get_Load_ptr(pred);
114 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
115 ir_node *mem = get_Load_mem(pred);
116 ir_nodeset_insert(&pi->user_mem, irn);
117 parallelize_store(pi, mem);
120 } else if (is_Store(pred) &&
121 get_Store_volatility(pred) == volatility_non_volatile) {
122 ir_mode *org_mode = pi->origin_mode;
123 ir_node *org_ptr = pi->origin_ptr;
124 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
125 ir_node *store_ptr = get_Store_ptr(pred);
126 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
129 ir_nodeset_insert(&pi->user_mem, irn);
130 mem = get_Store_mem(pred);
131 parallelize_store(pi, mem);
135 } else if (is_Sync(irn)) {
136 int n = get_Sync_n_preds(irn);
139 for (i = 0; i < n; ++i) {
140 ir_node *sync_pred = get_Sync_pred(irn, i);
141 parallelize_store(pi, sync_pred);
146 ir_nodeset_insert(&pi->this_mem, irn);
149 static void walker(ir_node *proj, void *env)
159 if (!is_Proj(proj)) return;
160 if (get_irn_mode(proj) != mode_M) return;
162 mem_op = get_Proj_pred(proj);
163 if (is_Load(mem_op)) {
164 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
166 block = get_nodes_block(mem_op);
167 pred = get_Load_mem(mem_op);
169 pi.origin_block = block,
170 pi.origin_ptr = get_Load_ptr(mem_op);
171 pi.origin_mode = get_Load_mode(mem_op);
172 ir_nodeset_init(&pi.this_mem);
173 ir_nodeset_init(&pi.user_mem);
175 parallelize_load(&pi, pred);
176 } else if (is_Store(mem_op)) {
177 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
179 block = get_nodes_block(mem_op);
180 pred = get_Store_mem(mem_op);
182 pi.origin_block = block,
183 pi.origin_ptr = get_Store_ptr(mem_op);
184 pi.origin_mode = get_irn_mode(get_Store_value(mem_op));
185 ir_nodeset_init(&pi.this_mem);
186 ir_nodeset_init(&pi.user_mem);
188 parallelize_store(&pi, pred);
193 n = ir_nodeset_size(&pi.user_mem);
194 if (n > 0) { /* nothing happened otherwise */
196 ir_node **in = XMALLOCN(ir_node*, n+1);
201 foreach_ir_nodeset(&pi.user_mem, node, iter) {
205 sync = new_r_Sync(block, i, in);
207 edges_reroute_except(proj, sync, sync);
209 n = ir_nodeset_size(&pi.this_mem);
211 sync = ir_nodeset_first(&pi.this_mem);
213 in = XMALLOCN(ir_node*, n);
215 foreach_ir_nodeset(&pi.this_mem, node, iter) {
219 sync = new_r_Sync(block, i, in);
221 set_memop_mem(mem_op, sync);
224 ir_nodeset_destroy(&pi.this_mem);
225 ir_nodeset_destroy(&pi.user_mem);
228 void opt_parallelize_mem(ir_graph *irg)
230 assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
231 irg_walk_graph(irg, NULL, walker, NULL);
232 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
235 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
237 return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);