2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * File name: ir/opt/data_flow_scalar_replace.c
23 * Purpose: scalar replacement of arrays and compounds
24 * Author: Beyhan Veliev
25 * Modified by: Michael Beck
28 * Copyright: (c) 1998-2005 Universität Karlsruhe
38 #include "data_flow_scalar_replace.h"
49 #include "analyze_irg_args.h"
51 #include "compute_loop_info.h"
55 #define SET_ENT_VNUM(ent, vnum) set_entity_link(ent, INT_TO_PTR(vnum))
56 #define GET_ENT_VNUM(ent) (unsigned)PTR_TO_INT(get_entity_link(ent))
57 #define SET_IRN_VNUM(irn, vnum) set_irn_link(irn, INT_TO_PTR(vnum))
58 #define GET_IRN_VNUM(irn) (unsigned)PTR_TO_INT(get_irn_link(irn))
62 typedef struct _ent_leaves_t{
63 ir_entity *ent; /**< An entity, that contains scalars for replace.*/
64 pset *leaves; /**< All leaves of this entity.*/
67 typedef struct _sels_t {
68 ir_node *sel; /**< A sel node, thats entity have scalars.*/
69 ir_entity *ent; /**< The entity of this sel node.*/
72 typedef struct _call_access_t {
73 ir_node *call; /**< A call node, that have as parameter a scalar.*/
74 unsigned int access_type; /**< The access type, with that this call access this scalar.*/
77 typedef struct _fixlist_entry_t {
78 ir_node *irn; /**< An ir node, that must be fixed.*/
79 unsigned int vnum; /**< The value number, that must became this ir node.*/
82 typedef struct _syncs_fixlist_entry_t {
83 ir_node *irn; /**< A sync node that must be fixed.*/
84 int *accessed_vnum; /**< A pointer to save an array with value numbers, that must became this sync.*/
85 }syncs_fixlist_entry_t;
87 /* A entry, that save the memory
88 * edge state and the access state for this leave
89 * int the array,that is created for every block.*/
90 typedef struct _leave_t {
91 ir_node *mem_edge_state; /**< memory state for this scalar in this block.*/
92 unsigned int access_type; /**< access state for this scalar in this block.*/
93 set *calls; /**< call nodes,that change this scalar in this block.*/
97 * A path element entry: it is either an entity
98 * or a tarval, because we evaluate only constant array
99 * accesses like a.b.c[8].d
107 * An access path, used to assign value numbers
108 * to variables that will be scalar replaced
110 typedef struct _path_t {
111 unsigned vnum; /**< the value number */
112 unsigned path_len; /**< the length of the access path */
113 path_elem_t path[1]; /**< the path */
117 * environment for memory walker
119 typedef struct _env_t {
120 struct obstack obst; /**< a obstack for the memory edge */
121 set *set_sels; /**< a set with all sels, that are reachable from an entity with a scalar.*/
122 set *set_ent; /**< a set with all entities that have one or more scalars.*/
123 fixlist_entry_t *fix_phis; /**< list of all Phi nodes that must be fixed */
124 fixlist_entry_t *fix_ls; /**< list of all Load or Store nodes that must be fixed */
125 syncs_fixlist_entry_t *fix_syncs; /**< list of all Sync nodes that must be fixed */
126 unsigned int nvals; /**< to save the number of scalars.*/
127 unsigned int gl_mem_vnum; /**< indicate the position of the globule memory edge state in var_arr.*/
128 unsigned int vnum_state; /**< indicate the position of the value number state in var_arr.*/
129 unsigned int changes; /**< to save if by anlyse_calls is changed anything.*/
135 * Compare two elements of the ent_leaves_t set.
137 * @return 0 if they are identically
139 static int ent_leaves_t_cmp(const void *elt, const void *key, size_t size)
141 const ent_leaves_t *c1 = elt;
142 const ent_leaves_t *c2 = key;
144 return c1->ent != c2->ent;
148 * Compare two elements of the ent_access_t set.
150 * @return 0 if they are identically
152 static int ent_cmp(const void *elt, const void *key)
154 const ir_entity *c1 = elt;
155 const ir_entity *c2 = key;
161 * Compare two elements of the sels_t set.
163 * @return 0 if they are identically
165 static int sels_cmp(const void *elt, const void *key, size_t size)
167 const sels_t *c1 = elt;
168 const sels_t *c2 = key;
170 return c1->sel != c2->sel;
174 * Compare two elements of the leave_t set.
176 * @return 0 if they are identically
178 static int leave_cmp(const void *elt, const void *key)
180 ir_node *c1 = (ir_node *)elt;
181 ir_node *c2 = (ir_node *)key;
183 return get_Sel_entity(c1) != get_Sel_entity(c2);
187 * Compare two elements of the call_access_t set.
189 * @return 0 if they are identically
191 static int call_cmp(const void *elt, const void *key, size_t size)
193 const call_access_t *c1 = elt;
194 const call_access_t *c2 = key;
196 return c1->call != c2->call;
202 * @return 0 if they are identically
204 static int path_cmp(const void *elt, const void *key, size_t size)
206 const path_t *p1 = elt;
207 const path_t *p2 = key;
209 /* we can use memcmp here, because identical tarvals should have identical addresses */
210 return memcmp(p1->path, p2->path, p1->path_len * sizeof(p1->path[0]));
214 * Calculate a hash value for a path.
216 static unsigned path_hash(const path_t *path)
221 for (i = 0; i < path->path_len; ++i)
222 hash ^= (unsigned)PTR_TO_INT(path->path[i].ent);
228 * Returns non-zero, if all induces of a Sel node are constants.
230 * @param sel the Sel node that will be checked
232 static int is_const_sel(ir_node *sel) {
233 int i, n = get_Sel_n_indexs(sel);
235 for (i = 0; i < n; ++i) {
236 ir_node *idx = get_Sel_index(sel, i);
238 if (get_irn_op(idx) != op_Const)
245 * Returns non-zero, if the address of an entity
246 * represented by a Sel node (or it's successor Sels) is taken.
248 static int is_address_taken(ir_node *sel)
252 if (! is_const_sel(sel))
255 for (i = get_irn_n_outs(sel) - 1; i >= 0; --i) {
256 ir_node *succ = get_irn_out(sel, i);
258 switch (get_irn_opcode(succ)) {
260 /* ok, we just load from that entity */
264 /* check that Sel is not the Store's value */
265 if (get_Store_value(succ) == sel)
270 /* Check the Sel successor of Sel */
271 int res = is_address_taken(succ);
279 /* The address of an entity is given as a parameter.
280 * We analyzes that later and optimizes this scalar
286 /* another op, the address is taken */
294 * Link all Sels with the entity.
296 * @param ent the entity that will be scalar replaced
297 * @param sel a Sel node that selects some fields of this entity
299 static void link_all_leave_sels(ir_entity *ent, ir_node *sel)
303 n = get_irn_n_outs(sel);
304 for (i = 0; i < n; ++i) {
305 ir_node *succ = get_irn_out(sel, i);
307 if (get_irn_op(succ) == op_Sel)
308 link_all_leave_sels(ent, succ);
312 /* if Sel nodes with memory inputs are used, a entity can be
313 * visited more than once causing a ring here, so we use the
314 * node flag to mark linked nodes
316 if (irn_visited(sel))
320 * we link the sels to the entity.
322 set_irn_link(sel, get_entity_link(ent));
323 set_entity_link(ent, sel);
325 mark_irn_visited(sel);
328 /* we need a special address that serves as an address taken marker */
330 static void *ADDRESS_TAKEN = &_x;
333 * Find possible scalar replacements.
335 * @param irg an IR graph
337 * This function finds variables on the (members of the) frame type
338 * that can be scalar replaced, because their address is never taken.
339 * If such a variable is found, it's entity link will hold a list of all
340 * Sel nodes, that selects anythings of this entity.
341 * Otherwise, the link will be ADDRESS_TAKEN or NULL.
343 * @return non-zero if at least one entity could be replaced
346 static int find_possible_replacements(ir_graph *irg)
348 ir_node *irg_frame = get_irg_frame(irg);
352 inc_irg_visited(irg);
354 n = get_irn_n_outs(irg_frame);
357 * First, clear the link field of all interestingentities.
358 * Note that we did not rely on the fact that there is only
359 * one Sel node per entity, so we might access one entity
360 * more than once here.
361 * That's why we have need two loops.
363 for (i = 0; i < n; ++i) {
364 ir_node *succ = get_irn_out(irg_frame, i);
366 if (get_irn_op(succ) == op_Sel) {
367 ir_entity *ent = get_Sel_entity(succ);
368 set_entity_link(ent, NULL);
373 * Check the ir_graph for Sel nodes. If the entity of Sel
374 * isn't a scalar replacement set the link of this entity
375 * equal ADDRESS_TAKEN.
377 for (i = 0; i < n; ++i) {
378 ir_node *succ = get_irn_out(irg_frame, i);
380 if (get_irn_op(succ) == op_Sel) {
381 ir_entity *ent = get_Sel_entity(succ);
384 if (get_entity_link(ent) == ADDRESS_TAKEN)
388 * Beware: in rare cases even entities on the frame might be
389 * volatile. This might happen if the entity serves as a store
390 * to a value that must survive a exception. Do not optimize
391 * such entities away.
393 if (get_entity_volatility(ent) == volatility_is_volatile) {
394 set_entity_link(ent, ADDRESS_TAKEN);
398 ent_type = get_entity_type(ent);
400 /* we can handle arrays, structs and atomic types yet */
401 if (is_Array_type(ent_type) || is_Struct_type(ent_type) || is_atomic_type(ent_type)) {
402 if (is_address_taken(succ)) {
403 if (get_entity_link(ent)) /* killing one */
405 set_entity_link(ent, ADDRESS_TAKEN);
408 /* possible found one */
409 if (get_entity_link(ent) == NULL)
411 link_all_leave_sels(ent, succ);
420 static int is_leave_sel(ir_node *sel) {
424 for(i = get_irn_n_outs(sel) - 1; i >= 0; i--) {
425 succ = get_irn_out(sel, i);
426 if(get_irn_op(succ) == op_Sel)
434 * Return a path from the Sel node sel to it's root.
436 * @param sel the Sel node
437 * @param len the length of the path so far
439 static path_t *find_path(ir_node *sel, unsigned len)
443 ir_node *pred = get_Sel_ptr(sel);
445 /* the current Sel node will add some path elements */
446 n = get_Sel_n_indexs(sel);
449 if (get_irn_op(pred) != op_Sel) {
450 /* we found the root */
452 res = xmalloc(sizeof(*res) + (len - 1) * sizeof(res->path));
456 res = find_path(pred, len);
458 pos = res->path_len - len;
460 res->path[pos++].ent = get_Sel_entity(sel);
461 for (i = 0; i < n; ++i) {
462 ir_node *index = get_Sel_index(sel, i);
464 if(get_irn_op(index) == op_Const)
465 res->path[pos++].tv = get_Const_tarval(index);
471 * Allocate value numbers for the leaves
472 * in our found entities.
474 * @param sels a set that will contain all Sels that have a value number
475 * @param ent the entity that will be scalar replaced
476 * @param vnum the first value number we can assign
477 * @param modes a flexible array, containing all the modes of
480 * @return the next free value number
482 static unsigned allocate_value_numbers(set *set_sels, pset *leaves, ir_entity *ent, unsigned vnum)
487 set *pathes = new_set(path_cmp, 8);
489 /* visit all Sel nodes in the chain of the entity */
490 for (sel = get_entity_link(ent); sel; sel = next) {
491 next = get_irn_link(sel);
493 /* we save for every sel it root entity, why
494 * we need this information, when we split the memory edge,
495 * and we must mark this sel for later. */
498 set_insert(set_sels, &key_sels, sizeof(key_sels), HASH_PTR(sel));
500 if(! is_leave_sel(sel))
502 /* We have found a leave and we add it to the pset of this entity.*/
503 pset_insert(leaves, sel, HASH_PTR(get_Sel_entity(sel)));
505 key = find_path(sel, 0);
506 path = set_find(pathes, key, sizeof(*key) + sizeof(key->path[0]) * key->path_len, path_hash(key));
509 SET_IRN_VNUM(sel, path->vnum);
514 set_insert(pathes, key, sizeof(*key) + sizeof(key->path[0]) * key->path_len, path_hash(key));
516 SET_IRN_VNUM(sel, key->vnum);
522 set_entity_link(ent, NULL);
526 * Add a sync node to it fix list.
528 * @param sync The sync node, that myst be addet to the fix list.
529 * @param unk_vnum An array whit the value number, that are synced with this sync node.
530 * @param env The enviroment pinter.
532 static void add_sync_to_fixlist(ir_node *sync, int *unk_vnum, env_t *env) {
534 syncs_fixlist_entry_t *s;
536 s = obstack_alloc(&env->obst, sizeof(*s));
538 s->accessed_vnum = unk_vnum;
539 set_irn_link(sync, env->fix_syncs);
543 * Add a ir node to it fix list.
545 * @param irn The ir node, that myst be addet to the fix list.
546 * @param vnum The value number, that must baceme this ir node as predecessor later.
547 * @param env The enviroment pinter.
549 static void add_ls_to_fixlist(ir_node *irn, int vnum, env_t *env) {
553 l = obstack_alloc(&env->obst, sizeof(*l));
557 if(get_irn_op(irn) == op_Phi) {
558 set_irn_link(l->irn, env->fix_phis);
561 set_irn_link(l->irn, env->fix_ls);
566 static void add_mem_edge(value_arr_entry_t *val_arr, int vnum, ir_node ***in, int **accessed_vnum) {
568 if(val_arr[vnum].mem_edge_state != NULL)
569 ARR_APP1(ir_node *, *in, val_arr[vnum].mem_edge_state);
571 ARR_APP1(int, *accessed_vnum, vnum);
572 ARR_APP1(ir_node *, *in, new_Unknown(mode_M));
576 * The function handles the scalars, that wase stored
579 * @param blk The block, that must be handled.
580 * @param env The enviroment pinter.
583 /* Return the memory successor of the call node.*/
584 static ir_node *get_Call_mem_out(ir_node *call) {
589 for(i = get_irn_n_outs(call) - 1; i >= 0; i--) {
590 mem = get_irn_out(call, i);
591 if(get_irn_mode(mem) == mode_M)
594 /* is not reachable*/
599 static void sync_stored_scalars(ir_node *blk, env_t *env) {
602 int *unk_vnum; /**< An arraw, where are saved the value number, that
603 are synced from this sync node.*/
604 ent_leaves_t *value_ent;
605 value_arr_entry_t *val_arr_blk, *val_arr;
606 ir_node *pred, *leave, *sync, **in;
607 ir_node *sync_blk; /**< The block, where the sync node must be created.*/
610 val_arr_blk = get_irn_link(blk);
612 for(value_ent = set_first(env->set_ent); value_ent; value_ent = set_next(env->set_ent)) {
615 if(val_arr_blk[GET_ENT_VNUM(value_ent->ent)].access_type <= 3)
616 /* This entity is not stored in this block.*/
619 for(i = get_Block_n_cfgpreds(blk) - 1; i >= 0; i--) {
621 pred = get_Block_cfgpred(blk, i);
622 pred = get_nodes_block(pred);
623 val_arr = get_irn_link(pred);
625 if(val_arr[GET_ENT_VNUM(value_ent->ent)].access_type == SYNCED)
626 /* This entity was synced.*/
629 if(val_arr[GET_ENT_VNUM(value_ent->ent)].access_type <= 3) {
631 /* To avoid repeated sync of this entity in this block.*/
632 val_arr[GET_ENT_VNUM(value_ent->ent)].access_type = SYNCED;
633 /* In this predecessor block is this entity not acessed.
634 * We must sync in the end ot this block.*/
635 if(get_Block_n_cfgpreds(blk) > 1)
636 sync_blk = get_nodes_block(get_Block_cfgpred(blk, i));
640 val_arr = get_irn_link(sync_blk);
641 /* An array to save the memory edges, that must be
643 in = NEW_ARR_F(ir_node *, 1);
645 /* An array to save the value numbers,
646 * that must be repaired.*/
647 unk_vnum = NEW_ARR_F(int, 0);
648 /* The global memory edge.*/
649 if(val_arr[env->gl_mem_vnum].mem_edge_state == NULL)
650 in[0] = new_Unknown(mode_M);
652 in[0] = val_arr[env->gl_mem_vnum].mem_edge_state;
654 for(leave = pset_first(value_ent->leaves); leave; leave = pset_next(value_ent->leaves))
655 /* All this memory edges must be synced.*/
656 add_mem_edge(val_arr, GET_IRN_VNUM(leave), &in, &unk_vnum);
658 /* We create the sync and set it in the global memory state.*/
659 sync = new_r_Sync(current_ir_graph, sync_blk, ARR_LEN(in), in);
660 /* We must check this, why it is possible to get a Bad node
661 * form new_r_Sync(), when the node can be optimized.
662 * In this case we must do nothing.*/
663 if(get_irn_op(sync) == op_Sync) {
664 val_arr[env->gl_mem_vnum].mem_edge_state = sync;
665 /* We add this sync node to the sync's fix list.*/
666 add_sync_to_fixlist(val_arr[env->gl_mem_vnum].mem_edge_state, unk_vnum, env);
674 * The function split the memory edge of load and store nodes, that have
675 * as predecessor a scalar
677 * @param irn The node, that memory edge must be spleted.
678 * @param env The enviroment pinter.
680 static void split_ls_mem_edge(ir_node *irn, env_t *env) {
683 ir_node *leave, *irn_blk, *mem_state, *new_mem_state;
684 unsigned ent_vnum, sel_vnum, i;
685 value_arr_entry_t *val_arr;
686 sels_t key_sels, *value_sels;
687 ent_leaves_t key_ent, *value_ent;
689 op = get_irn_op(irn);
692 key_sels.sel = get_Load_ptr(irn);
694 key_sels.sel = get_Store_ptr(irn);
696 value_sels = set_find(env->set_sels, &key_sels, sizeof(key_sels), HASH_PTR(key_sels.sel));
698 if(value_sels != NULL) {
699 /* we have found a load or store, that use a sel of our set
700 * and we must split or extend, if the memory edge have been
701 * split for this sel, the memory edge.*/
703 key_ent.ent = value_sels->ent;
704 value_ent = set_find(env->set_ent, &key_ent, sizeof(key_ent), HASH_PTR(key_ent.ent));
705 /*To check if the enities set is right filled. */
706 assert(value_ent && " This sel's entity isn't int the entity set.");
708 leave = pset_find(value_ent->leaves, key_sels.sel, HASH_PTR(get_Sel_entity(key_sels.sel)));
709 /*To check if the leaves set is right filled. */
710 assert(leave && "Anything in data_flow_scalar_replacment algorithm is wrong.");
712 ent_vnum = GET_ENT_VNUM(value_ent->ent);
713 sel_vnum = GET_IRN_VNUM(leave);
714 irn_blk = get_nodes_block(irn);
715 val_arr = get_irn_link(irn_blk);
717 if(val_arr[ent_vnum].access_type == 0)
718 /* We have found a scalar, that address is not stored as jet.*/
721 /* This scalar have been stored.*/
722 i = env->gl_mem_vnum;
724 if(val_arr[i].mem_edge_state == NULL) {
725 /* We split now for this sel the memory edge in this block.*/
726 mem_state = new_Unknown(mode_M);
727 /* We must mark this node to fix later*/
728 add_ls_to_fixlist(irn, i, env);
731 /* We have split the memory edge and the current state is saved.*/
732 mem_state = val_arr[i].mem_edge_state;
734 /* We set this Load or Store to the memory edge of this
737 set_Load_mem(irn, mem_state);
739 set_Store_mem(irn, mem_state);
741 /* When we have split or extended the memory edge we must
742 * update the memory_edge_state of this sel*/
743 new_mem_state = get_irn_out(irn, 0);
744 if(get_irn_mode(new_mem_state) == mode_M)
745 val_arr[i].mem_edge_state = new_mem_state;
747 val_arr[i].mem_edge_state = get_irn_out(irn, 1);
752 * The function split the memory edge of phi nodes, that have
753 * as predecessor a scalar
755 * @param irn The phi node, that memory edge must be spleted.
756 * @param env The enviroment pinter.
758 static void split_phi_mem_edge(ir_node *irn, env_t *env) {
760 ir_node *irn_blk, *unk, *leave, **in;
762 ent_leaves_t *value_ent;
763 value_arr_entry_t *val_arr;
765 irn_blk = get_nodes_block(irn);
766 val_arr = get_irn_link(irn_blk);
768 n = get_Block_n_cfgpreds(irn_blk);
770 in = alloca(sizeof(*in) * n);
772 for(value_ent = set_first(env->set_ent); value_ent; value_ent = set_next(env->set_ent))
773 if(val_arr[GET_ENT_VNUM(value_ent->ent)].access_type < 3)
774 /* This scalar wasn't be saved and we need to produce a phi for it.*/
775 for(leave = pset_first(value_ent->leaves); leave; leave = pset_next(value_ent->leaves)){
777 unk = new_Unknown(mode_M);
778 for (j = n - 1; j >= 0; --j)
781 val_arr[GET_IRN_VNUM(leave)].mem_edge_state = new_r_Phi(current_ir_graph, irn_blk, n, in, mode_M);
783 add_ls_to_fixlist(val_arr[GET_IRN_VNUM(leave)].mem_edge_state, GET_IRN_VNUM(leave), env);
786 /* We use for the global memory the phi node, that
787 * is already available.*/
788 val_arr[env->gl_mem_vnum].mem_edge_state = irn;
792 * The function handles the call nodes, that have
793 * as parameter a scalar
795 * @param env The enviroment pinter.
796 * @param call The call node, that must be handled.
797 * @param accessed_entities A set wit all entities, that are accessed from this call node.*/
798 static void split_call_mem_edge(env_t *env, ir_node *call, pset *accessed_entities) {
800 ent_leaves_t key_ent, *value_ent;
801 value_arr_entry_t *val_arr;
802 call_access_t key_call, *value_call;
803 ir_node *call_blk, *new_mem_state, *leave;
807 int fix_irn = 0; /**< Set to 1 if we must add this call to it fix list.*/
808 int *accessed_leaves_vnum = NULL; /**< An arraw, where are saved the value number, that
809 are synced from call's sync node, if we need it.*/
811 if(get_irn_node_nr(call) == 2763)
814 call_blk = get_nodes_block(call);
815 val_arr = get_irn_link(call_blk);
816 /* An array to save the memory edges, that must be
818 in = NEW_ARR_F(ir_node *, 1);
819 /* An array to save the value numbers of the memory
820 * edges that must be repaired.*/
821 accessed_leaves_vnum = NEW_ARR_F(int, 0);
823 /* We get the memory successor of the call node.
824 * It is the new memory state for all synced memory
826 new_mem_state = get_Call_mem_out(call);
828 /* The global memory is the first predecessor of the create sync node.*/
829 if(val_arr[env->gl_mem_vnum].mem_edge_state == NULL) {
830 in[0] = new_Unknown(mode_M);
834 in[0] = val_arr[env->gl_mem_vnum].mem_edge_state;
837 for(ent = pset_first(accessed_entities); ent; ent = pset_next(accessed_entities)) {
838 /* Whit this loop we iterate all accessed entities from this call and collect
839 * all memory edges, that we must sync.*/
840 ent_vnum = GET_ENT_VNUM(ent);
842 key_call.call = call;
843 value_call = set_find(val_arr[ent_vnum].calls, &key_call, sizeof(key_call), HASH_PTR(key_call.call));
846 value_ent = set_find(env->set_ent, &key_ent, sizeof(key_ent), HASH_PTR(key_ent.ent));
848 if(val_arr[ent_vnum].access_type <= 3) {
849 /* This scalar's address wasn't stored in this block.*/
850 switch(value_call->access_type) {
852 case ptr_access_none :
853 /* In this case we have nothing to do.*/
856 case ptr_access_read:
857 case ptr_access_write:
859 /* All this cases must be traded equal.*/
861 for(leave = pset_first(value_ent->leaves); leave; leave = pset_next(value_ent->leaves)){
862 /* All this memory edges must be synced.*/
863 add_mem_edge(val_arr, GET_IRN_VNUM(leave), &in, &accessed_leaves_vnum);
865 /* We update the memory state of this leave.*/
866 if(value_call->access_type != ptr_access_read)
867 val_arr[GET_IRN_VNUM(leave)].mem_edge_state = new_mem_state;
876 /* We must update the global memory state.*/
877 val_arr[env->gl_mem_vnum].mem_edge_state = new_mem_state;
879 if(ARR_LEN(in) == 1) {
880 /* we must set the call memory to gobale momory*/
881 set_Call_mem(call,in[0]);
884 /* We add this call node to the call fix list..*/
885 add_ls_to_fixlist(call, env->gl_mem_vnum, env);
888 /* We create the sync and set it as memory predecessor of the call node.*/
889 sync = new_r_Sync(current_ir_graph, call_blk, ARR_LEN(in), in);
890 /* We must check this, why it is possible to get a Bad node
891 * form new_r_Sync(), when the node can be optimized.
892 * In this case we must do nothing.*/
893 if(get_irn_op(sync) == op_Sync) {
895 set_Call_mem(call, sync);
896 if(ARR_LEN(accessed_leaves_vnum))
897 /* We add this sync node to the sync's fix list.*/
898 add_sync_to_fixlist(sync, accessed_leaves_vnum, env);
905 * The function split the memory edge from the passed
906 * ir node if this is needed
908 * @param irn The node, that memory edge must be spleted.
909 * @param env The enviroment pinter.
911 static void split_memory_edge(ir_node *irn, void *ctx) {
914 ir_node *sel, *irn_blk;
916 sels_t key_sels, *value_sels;
917 value_arr_entry_t *val_arr;
918 pset *accessed_entities; /**< A set to save all entities accessed from a call.*/
922 op = get_irn_op(irn);
927 irn_blk = get_nodes_block(irn);
929 if (Block_not_block_visited(irn_blk)) {
930 /* We sync first the stored scalar address in this block.*/
931 mark_Block_block_visited(irn_blk);
932 sync_stored_scalars(irn_blk, env);
935 if(op == op_Load || op == op_Store)
937 split_ls_mem_edge(irn, env);
940 if (op == op_Phi && get_irn_mode(irn) == mode_M) {
942 * found a memory Phi: Here, we must create new Phi nodes
944 split_phi_mem_edge(irn, env);
949 /* Calls that have a NoMem input do neither read nor write memory.
950 We can completely ignore them here. */
951 if (get_irn_op(get_Call_mem(irn)) == op_NoMem)
954 /* We save in this set all entities,
955 * that are accessed from this call node.*/
956 accessed_entities = new_pset(ent_cmp, 8);
957 val_arr = get_irn_link(get_nodes_block(irn));
959 for ( i = get_Call_n_params(irn) - 1; i >= 0; i--) {
961 sel = get_Call_param(irn, i);
963 if(get_irn_op(sel) == op_Sel) {
965 value_sels = set_find(env->set_sels, &key_sels, sizeof(key_sels), HASH_PTR(key_sels.sel));
967 if(value_sels != NULL && val_arr[GET_ENT_VNUM(value_sels->ent)].access_type <= 3)
968 /* We save in this set all accessed entities from this call node whit
969 * access none, read, write or rw..*/
970 pset_insert(accessed_entities, value_sels->ent, HASH_PTR(value_sels->ent));
974 if(pset_count(accessed_entities))
975 split_call_mem_edge(env, irn, accessed_entities);
977 del_pset(accessed_entities);
984 * searches through blocks beginning from block for value
985 * vnum and return it.
987 * @param block A block from the current ir graph.
988 * @param vnum The value number, that must be found.
990 static ir_node *find_vnum_value(ir_node *block, unsigned vnum)
992 value_arr_entry_t *val_arr;
996 if (Block_not_block_visited(block)) {
997 mark_Block_block_visited(block);
999 val_arr = get_irn_link(block);
1001 if (val_arr[vnum].mem_edge_state)
1002 return val_arr[vnum].mem_edge_state;
1004 for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
1005 ir_node *pred = get_Block_cfgpred(block, i);
1007 res = find_vnum_value(get_nodes_block(pred), vnum);
1016 * fix the Load/Store or Call list
1018 * @param The enviroment pinter.
1020 static void fix_ls(env_t *env)
1023 ir_node *irn, *block, *pred, *val = NULL;
1027 for (l = env->fix_ls; l; l = get_irn_link(irn)) {
1030 op = get_irn_op(irn);
1031 block = get_nodes_block(irn);
1032 for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
1033 pred = get_Block_cfgpred(block, i);
1034 pred = get_nodes_block(pred);
1036 inc_irg_block_visited(current_ir_graph);
1037 val = find_vnum_value(pred, l->vnum);
1045 set_Store_mem(irn, val);
1048 set_Load_mem(irn, val);
1050 set_Call_mem(irn, val);
1058 * @param The enviroment pinter.
1060 static void fix_phis(env_t *env)
1063 ir_node *phi, *block, *pred, *val;
1066 for (l = env->fix_phis; l; l = get_irn_link(phi)) {
1069 block = get_nodes_block(phi);
1070 for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
1072 pred = get_Block_cfgpred(block, i);
1073 pred = get_nodes_block(pred);
1075 inc_irg_block_visited(current_ir_graph);
1076 val = find_vnum_value(pred, l->vnum);
1079 set_irn_n(phi, i, val);
1088 * @param The enviroment pinter.
1090 static void fix_syncs(env_t *env)
1092 syncs_fixlist_entry_t *l;
1093 ir_node *sync, *block, *pred, *val;
1097 for (l = env->fix_syncs; l; l = get_irn_link(sync)) {
1101 /* The sync block must have one predecessor, when it
1102 have unknown nodes as predecessor.*/
1103 block = get_nodes_block(sync);
1104 pred = get_Block_cfgpred(block, 0);
1105 pred = get_nodes_block(pred);
1107 /* We first repair the global memory edge at the first position of sync predecessors.*/
1108 if(get_irn_op(get_irn_n(sync, 0)) == op_Unknown) {
1109 inc_irg_block_visited(current_ir_graph);
1110 val = find_vnum_value(pred, env->gl_mem_vnum);
1113 set_irn_n(sync, 0, val);
1116 for (i = get_irn_arity(sync) - 1; i >= 1; --i) {
1117 /* We repair the leaves*/
1119 assert(k <= ARR_LEN(l->accessed_vnum) && "The algorythm for sync repair is wron");
1120 if(get_irn_op(get_irn_n(sync, i)) == op_Unknown) {
1121 inc_irg_block_visited(current_ir_graph);
1122 val = find_vnum_value(pred, l->accessed_vnum[k++]);
1125 set_irn_n(sync, i, val);
1128 DEL_ARR_F(l->accessed_vnum);
1132 * For the end node we must sync all memory edges.
1134 * @param The enviroment pinter.
1136 static void sync_mem_edges(env_t *env) {
1138 value_arr_entry_t *val_arr;
1139 ir_node **in, *sync, *Return, *Return_blk;
1140 int i, vnum, vnum_state;
1142 Return = get_Block_cfgpred(get_irg_end_block(current_ir_graph), 0);
1143 Return_blk = get_nodes_block(Return);
1144 val_arr = get_irn_link(Return_blk);
1148 for(i = 0; i <= (int)env->gl_mem_vnum; i++)
1149 /* we get the current state of non saved scalars.*/
1150 if(val_arr[i].access_type <= 3)
1153 /* We allocate the memory, that we need for the predecessors of the sync.*/
1154 in = xmalloc(sizeof(ir_node*) *vnum_state);
1156 /* The global memory edge is the first predecessor of this sync node.*/
1157 if(val_arr[env->gl_mem_vnum].mem_edge_state == NULL) {
1158 /* We must search through blocks for this memory state.*/
1159 inc_irg_block_visited(current_ir_graph);
1160 in[0] = find_vnum_value(Return_blk, env->gl_mem_vnum);
1163 in[0] = val_arr[env->gl_mem_vnum].mem_edge_state;
1166 for(i = 1, vnum = 0; vnum < (int)env->gl_mem_vnum; vnum++) {
1168 if(val_arr[vnum].access_type <= 3) {
1169 /* we add the non saved scalars as predecessors of the sync.*/
1171 if(val_arr[vnum].mem_edge_state == NULL) {
1172 /* We must search through blocks for this memory state.*/
1173 inc_irg_block_visited(current_ir_graph);
1174 in[i] = find_vnum_value(Return_blk, vnum);
1177 in[i] = val_arr[vnum].mem_edge_state;
1182 sync = new_r_Sync(current_ir_graph, Return_blk, vnum_state, in);
1183 set_Return_mem(Return, sync);
1189 * Walker: allocate the value array for every block.
1191 * @param block A block from the current ir graph for that must be allocated a value array.
1192 * @param ctx The enviroment pinter.
1194 static void alloc_value_arr(ir_node *block, void *ctx)
1199 value_arr_entry_t *var_arr = obstack_alloc(&env->obst, sizeof(value_arr_entry_t) *(env->nvals + set_count(env->set_ent) + 1));
1201 /* the value array is empty at start */
1202 memset(var_arr, 0, sizeof(value_arr_entry_t) * (env->nvals + set_count(env->set_ent) + 1));
1203 set_irn_link(block, var_arr);
1205 /* We set the block value number state to optimal and later we update this.*/
1206 var_arr[env->vnum_state].access_type = env->nvals;
1208 if(get_irg_start_block(current_ir_graph) == block)
1209 /* We initilize the startblocks array with the irg initilize memory, why
1210 * it must be the start point of all memory edges.*/
1211 for(i = (env->nvals + set_count(env->set_ent)) ; i >=0; i--)
1212 var_arr[i].mem_edge_state = get_irg_initial_mem(current_ir_graph);
1216 /* Analyze call nodes to get information, if they store the address of a scalar.
1218 * @param *irn An ir node from the current_ir_graph.
1219 * @param *env The enviroment pointer.
1221 static void analyse_calls(ir_node *irn, void *ctx) {
1224 unsigned int acces_type;
1225 ir_node *param, *call_ptr, *blk;
1227 ir_entity *meth_ent;
1228 sels_t key_sels, *value_sels;
1229 call_access_t key_call, *value_call;
1230 value_arr_entry_t *val_arr;
1234 if(get_irn_op(irn) != op_Call)
1237 /* Calls that have a NoMem input do neither read nor write memory.
1238 We can completely ignore them here. */
1239 if (get_irn_op(get_Call_mem(irn)) == op_NoMem)
1242 /* We iterate over the parameters of this call nodes.*/
1243 for ( i = get_Call_n_params(irn) - 1; i >= 0; i--) {
1244 param = get_Call_param(irn, i);
1245 if(get_irn_op(param) == op_Sel) {
1246 /* We have found a parameter with operation sel.*/
1247 key_sels.sel = param;
1248 value_sels = set_find(env->set_sels, &key_sels, sizeof(key_sels), HASH_PTR(key_sels.sel));
1249 if(value_sels != NULL ) {
1251 /* We have found a call, that have as parameter a sel from our set_sels.*/
1252 call_ptr = get_Call_ptr(irn);
1253 op = get_irn_op(call_ptr);
1255 if(op == op_SymConst && get_SymConst_kind(call_ptr) == symconst_addr_ent) {
1256 meth_ent = get_SymConst_entity(call_ptr);
1257 /* we get the access type for our sel.*/
1258 acces_type = get_method_param_access(meth_ent, i);
1260 /* We can't analyze this function and we asume, that it store the address.*/
1261 acces_type = ptr_access_store;
1263 /* we save the access type and this call in the array allocated for this block.
1264 * The value number of this entity get us the position in the array to save this
1265 * information. Why we expect more calls as one we allocate a set.*/
1266 vnum = GET_ENT_VNUM(value_sels->ent);
1267 blk = get_nodes_block(irn);
1268 val_arr = get_irn_link(blk);
1270 if(val_arr[vnum].access_type > 3)
1271 /* The address of this entity have been stored.*/
1274 if(val_arr[vnum].calls == NULL)
1275 /* for this entity i have found the firs call in this block and we must allocate the set.*/
1276 val_arr[vnum].calls = new_set(call_cmp, 8);
1278 /* This call performs anything with the scalar and we must mark it.*/
1279 key_call.call = irn;
1280 key_call.access_type = acces_type;
1281 value_call = set_insert(val_arr[vnum].calls, &key_call, sizeof(key_call), HASH_PTR(key_call.call));
1283 if(value_call->access_type < acces_type)
1284 /* this case tread, when a call access an entity more at once.
1285 * Than we must save the highest access type.*/
1286 value_call->access_type = acces_type;
1289 /* This call save the address of our scalar and we can't
1290 * use the scalars of this entity for optimization as from now.
1292 val_arr[vnum].access_type = acces_type;
1298 static int set_block_dominated_first_access(ir_node *blk, int vnum, unsigned int access) {
1300 ir_node *idom, *succ;
1301 value_arr_entry_t *val_arr;
1304 idom = get_Block_idom(blk);
1305 for(i = get_Block_n_cfg_outs(idom) - 1; i >=1; i--) {
1306 succ = get_Block_cfg_out(idom, i);
1307 val_arr = get_irn_link(succ);
1308 if(val_arr[vnum].access_type < 3) {
1309 val_arr[vnum].access_type = access;
1315 /* Update the access information of a block if a predecessor of
1316 * this black have a higher access for an entity.
1318 * @param *irn An ir node from the current_ir_graph.
1319 * @param *env The enviroment pointer.
1321 static void set_block_access(ir_node *irn, void *ctx){
1323 value_arr_entry_t *val_arr, *val_arr_pred;
1324 ent_leaves_t *value_leaves;
1325 ir_node *pred, *pred_blk, *leave;
1330 val_arr = get_irn_link(irn);
1332 for( i = get_Block_n_cfgpreds(irn) - 1; i >= 0; i--) {
1333 /* We analyze the predecessors of this block to see if this block must
1335 pred = get_Block_cfgpred(irn, i);
1336 pred_blk = get_nodes_block(pred);
1338 val_arr_pred = get_irn_link(pred_blk);
1340 for(value_leaves = set_first(env->set_ent); value_leaves; value_leaves = set_next(env->set_ent)) {
1341 vnum = GET_ENT_VNUM(value_leaves->ent);
1343 if((get_Block_n_cfgpreds(irn) > 1) && (val_arr[vnum].access_type > 3))
1344 env->changes = set_block_dominated_first_access(irn, vnum, val_arr[vnum].access_type);
1346 if((val_arr_pred[vnum].access_type > 3) && (val_arr[vnum].access_type < 3)) {
1347 /* We have found a block for update it access and value number information.*/
1348 val_arr[vnum].access_type = val_arr_pred[vnum].access_type;
1349 /* We update the access information of all leave, that belong to
1352 for(leave = pset_first(value_leaves->leaves); leave; leave = pset_next(value_leaves->leaves))
1353 val_arr[GET_IRN_VNUM(leave)].access_type = val_arr[vnum].access_type;
1355 /* In this way can't be got the actuall number of value numbers.
1356 val_arr[env->vnum_state].access_type = val_arr_pred[env->vnum_state].access_type; */
1362 /* Free the allocated call sets.
1364 * @param irn A block form the ir graph.
1365 * @param env The enviroment pinter.
1367 static void free_call_info(ir_node *irn, void *ctx) {
1371 value_arr_entry_t *val_arr;
1374 val_arr = get_irn_link(irn);
1376 for(i = env->nvals + set_count(env->set_ent); i >= 0; i--) {
1377 if(val_arr[i].calls != NULL)
1379 del_set(val_arr[i].calls);
1383 static void print_block_state(ir_node *irn, void *ctx) {
1385 value_arr_entry_t *val_arr;
1386 ent_leaves_t *value_leaves;
1387 call_access_t *value_calls;
1392 val_arr = get_irn_link(irn);
1393 ir_printf("\n\nThe actual value number state of this block is: %i \n",
1394 val_arr[env->vnum_state].access_type - 1);
1396 for(value_leaves = set_first(env->set_ent); value_leaves; value_leaves = set_next(env->set_ent)) {
1398 vnum = GET_ENT_VNUM(value_leaves->ent);
1399 ir_printf("The entity %F access type in the block with nr %u is %i \n",
1400 value_leaves->ent, get_irn_node_nr(irn), val_arr[vnum].access_type);
1402 if(val_arr[vnum].calls != NULL)
1403 for(value_calls = set_first(val_arr[vnum].calls); value_calls; value_calls = set_next(val_arr[vnum].calls))
1405 ir_printf("A call with nr %i acess a element of this entity with access %u \n",
1406 get_irn_node_nr(value_calls->call), value_calls->access_type);
1411 /** Optimize the found scalar replacements.
1413 * @param set_sels A set with all entities, that
1415 * @param set_ent A set with all sels nodes,
1416 * that belong to our scalars.
1417 * @param vnum The number of scalars.
1419 static void do_data_flow_scalar_replacement(set *set_ent, set *set_sels, int vnum) {
1423 obstack_init(&env.obst);
1424 env.set_ent = set_ent;
1425 env.set_sels = set_sels;
1427 env.fix_phis = NULL;
1428 env.fix_syncs = NULL;
1429 env.gl_mem_vnum = vnum - 2;
1430 env.vnum_state = vnum - 1;
1431 /* nvals are vnum - 1, why we indicate with nvals the number
1432 * of memory edges we will produce. For vnum_state we don't
1433 * need to produce a memory edge.*/
1434 env.nvals = vnum - 1;
1437 /* first step: allocate the value arrays for every block */
1438 irg_block_walk_graph(current_ir_graph, NULL, alloc_value_arr, &env);
1440 /* second step: we analyze all calls, that have as parameter scalar(s).
1441 * We mark the calls, that save the address of a scalar and we
1442 * mark the entity owner of this scalar as not optimizeble by now.*/
1443 irg_walk_graph(current_ir_graph, NULL, analyse_calls, &env);
1445 while(env.changes) {
1450 * third step: walk over the blocks of a graph and update
1451 * the information for the access of our scalars.
1453 irg_block_walk_graph(current_ir_graph, NULL, set_block_access, &env);
1457 // if(get_firm_verbosity())
1458 /* Debug info to see if analyse_calls work properly.*/
1459 irg_block_walk_graph(current_ir_graph, NULL, print_block_state, &env);
1462 * fourth step: walk over the graph blockwise in topological order
1463 * and split the memrory edge.
1465 inc_irg_block_visited(current_ir_graph);
1466 irg_walk_blkwise_graph(current_ir_graph, NULL, split_memory_edge, &env);
1470 /* fifth step: fix all nodes, that have as predecessor Unknown.*/
1475 /* sixth step: sync memory enges for the end block.*/
1476 sync_mem_edges(&env);
1478 /*seventh step: free the allocated memory*/
1479 irg_block_walk_graph(current_ir_graph, NULL, free_call_info, &env);
1480 obstack_free(&env.obst, NULL);
1484 * Find possible scalar replacements
1486 * @param irg The current ir graph.
1488 void data_flow_scalar_replacement_opt(ir_graph *irg) {
1494 ent_leaves_t key_leaves, *value_leaves;
1497 if (! get_opt_scalar_replacement())
1500 set_sels = new_set(sels_cmp, 8);
1501 set_ent = new_set(ent_leaves_t_cmp, 8);
1503 /* Call algorithm that remove the critical edges of a ir graph. */
1504 remove_critical_cf_edges(irg);
1506 /* Call algorithm that computes the out edges.*/
1507 if (get_irg_outs_state(irg) != outs_consistent)
1508 compute_irg_outs(irg);
1510 /* Call algorithm that computes the loop information.*/
1511 compute_loop_info(irg);
1512 /* Call algorithm that computes the dominance information.*/
1515 /* Find possible scalar replacements */
1516 if (find_possible_replacements(irg)) {
1518 /* Insert in set the scalar replacements. */
1519 irg_frame = get_irg_frame(irg);
1521 for (i = 0 ; i < get_irn_n_outs(irg_frame); i++) {
1522 ir_node *succ = get_irn_out(irg_frame, i);
1524 if (get_irn_op(succ) == op_Sel) {
1525 ir_entity *ent = get_Sel_entity(succ);
1527 if (get_entity_link(ent) == NULL || get_entity_link(ent) == ADDRESS_TAKEN)
1529 /* we have found a entity, that have scalars and we insert it to our set_ent*/
1530 key_leaves.ent = ent;
1531 key_leaves.leaves = new_pset(leave_cmp, 8);
1532 value_leaves = set_insert(set_ent, &key_leaves, sizeof(key_leaves), HASH_PTR(ent));
1534 /* We allocate for every leave sel a vnum.*/
1535 vnum = allocate_value_numbers(set_sels, value_leaves->leaves, ent, vnum);
1539 if(get_firm_verbosity())
1540 printf("vnumber in data flow= %i\n", vnum);
1542 /* Allocate value number for the globule memory edge.
1543 * and a value number for the value numbers state.*/
1546 /* Allocate value numbers for the entities .*/
1547 for(i = vnum,value_leaves = set_first(set_ent); value_leaves; i++, value_leaves = set_next(set_ent))
1548 SET_ENT_VNUM(value_leaves->ent, i);
1551 do_data_flow_scalar_replacement(set_ent, set_sels, vnum);
1553 /*free the allocated memory.*/
1554 for(value_leaves = set_first(set_ent); value_leaves; value_leaves = set_next(set_ent))
1555 del_pset(value_leaves->leaves);