Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 73562 Details for
Bug 113541
New GCC 4.1.0_beta20051123-r1 (pre-release) (unofficial snapshot)- new ebuild + cool fixes and customizations
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
reassoc-20051116-vanilla1.patch
reassoc-20051116-vanilla1.patch (text/plain), 71.21 KB, created by
Alexey Maximov
on 2005-11-25 02:25:02 UTC
(
hide
)
Description:
reassoc-20051116-vanilla1.patch
Filename:
MIME Type:
Creator:
Alexey Maximov
Created:
2005-11-25 02:25:02 UTC
Size:
71.21 KB
patch
obsolete
> gcc/Makefile.in | 3 > gcc/passes.c | 3 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c | 2 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c | 11 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c | 11 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c | 17 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c | 20 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c | 4 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c | 17 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c | 13 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c | 12 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c | 13 > gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c | 14 > gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c | 5 > gcc/tree-flow.h | 2 > gcc/tree-ssa-dom.c | 47 > gcc/tree-ssa-operands.c | 34 > gcc/tree-ssa-reassoc.c | 1800 +++++++++++++++++++++-------- > 18 files changed, 1526 insertions(+), 502 deletions(-) > >Index: gcc41/gcc/Makefile.in >=================================================================== >--- gcc41.orig/gcc/Makefile.in >+++ gcc41/gcc/Makefile.in >@@ -1932,7 +1932,8 @@ tree-ssa-alias.o : tree-ssa-alias.c $(TR > tree-ssa-reassoc.o : tree-ssa-reassoc.c $(TREE_FLOW_H) $(CONFIG_H) \ > $(SYSTEM_H) $(TREE_H) $(GGC_H) $(DIAGNOSTIC_H) errors.h $(TIMEVAR_H) \ > $(TM_H) coretypes.h $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) tree-iterator.h\ >- $(BASIC_BLOCK_H) $(HASHTAB_H) $(TREE_GIMPLE_H) tree-inline.h >+ $(BASIC_BLOCK_H) $(TREE_GIMPLE_H) tree-inline.h vec.h \ >+ alloc-pool.h > tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ > $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \ > $(FLAGS_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) toplev.h \ >Index: gcc41/gcc/passes.c >=================================================================== >--- gcc41.orig/gcc/passes.c >+++ gcc41/gcc/passes.c >@@ -535,8 +535,9 @@ init_optimization_passes (void) > which can create arbitrary GIMPLE. */ > NEXT_PASS (pass_may_alias); > NEXT_PASS (pass_cse_reciprocals); >- NEXT_PASS (pass_split_crit_edges); > NEXT_PASS (pass_reassoc); >+ NEXT_PASS (pass_dce); >+ NEXT_PASS (pass_split_crit_edges); > NEXT_PASS (pass_pre); > NEXT_PASS (pass_sink_code); > NEXT_PASS (pass_tree_loop); >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c >=================================================================== >--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c >@@ -14,5 +14,5 @@ int main(void) > printf ("%d %d\n", e, f); > } > >-/* { dg-final { scan-tree-dump-times "a \\\+ b" 1 "optimized"} } */ >+/* { dg-final { scan-tree-dump-times "b \\\+ a" 1 "optimized"} } */ > /* { dg-final { cleanup-tree-dump "optimized" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c >@@ -0,0 +1,11 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+int main(int a, int b, int c, int d) >+{ >+ /* Should become just a & b & c & d */ >+ int e = (a & b) & (c & d); >+ int f = (c & a) & (b & d); >+ return e & f; >+} >+/* { dg-final { scan-tree-dump-times "\\\& " 3 "optimized"} } */ >+/* { dg-final { cleanup-tree-dump "optimized" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c >@@ -0,0 +1,11 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-reassoc" } */ >+int main(int a, int b, int c, int d) >+{ >+ /* All the xor's cancel each other out, leaving 0 */ >+ int e = (a ^ b) ^ (c ^ d); >+ int f = (c ^ a) ^ (b ^ d); >+ return e ^ f; >+} >+/* { dg-final { scan-tree-dump-times "= 0" 1 "reassoc"} } */ >+/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c >=================================================================== >--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c >@@ -1,18 +1,17 @@ > /* { dg-do compile } */ >-/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-reassoc-details" } */ >-extern int a0, a1, a2, a3, a4; >-int f () >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+int f (int a0,int a1,int a2,int a3,int a4) > { >-int b0, b1, b2, b3, b4; >+int b0, b1, b2, b3, b4,e; > /* this can be optimized to four additions... */ > b4 = a4 + a3 + a2 + a1 + a0; > b3 = a3 + a2 + a1 + a0; > b2 = a2 + a1 + a0; > b1 = a1 + a0; > /* This is actually 0 */ >- return b4 - b3 + b2 - b1 - a4 - a2; >-} >-/* { dg-final { scan-tree-dump-times "Reassociating by rank" 3 "reassoc" } } */ >-/* { dg-final { scan-tree-dump-times "return 0" 1 "optimized" { xfail *-*-* } } } */ >+ e = b4 - b3 + b2 - b1 - a4 - a2; >+ return e; >+} >+ >+/* { dg-final { scan-tree-dump-times "return 0" 1 "optimized" } } */ > /* { dg-final { cleanup-tree-dump "optimized" } } */ >-/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c >=================================================================== >--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c >@@ -1,18 +1,6 @@ >-/* { dg-do compile } */ >-/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */ >-float a, b, c, d; >-extern int printf (const char *, ...); >-int main(void) >+int main(int a, int b, int c, int d) > { >- float e; >- float f; >- /* We should be able to transform these into the same expression, and only have two additions. */ >- e = a + b; >- e = e + c; >- f = c + a; >- f = f + b; >- printf ("%f %f\n", e, f); >+ int e = (a & ~b) & (~c & d); >+ int f = (~c & a) & (b & ~d); >+ return (e & f); > } >- >-/* { dg-final { scan-tree-dump-times "\\\+" 2 "optimized"} } */ >-/* { dg-final { cleanup-tree-dump "optimized" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c >=================================================================== >--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c >@@ -1,5 +1,5 @@ > /* { dg-do compile } */ >-/* { dg-options "-O2 -fdump-tree-optimized" } */ >+/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */ > float a, b, c, d; > extern int printf (const char *, ...); > int main(void) >@@ -14,5 +14,5 @@ int main(void) > printf ("%f %f\n", e, f); > } > >-/* { dg-final { scan-tree-dump-times "\\\+" 4 "optimized"} } */ >+/* { dg-final { scan-tree-dump-times "\\\+" 2 "optimized"} } */ > /* { dg-final { cleanup-tree-dump "optimized" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c >@@ -0,0 +1,17 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+extern int printf (const char *, ...); >+int main(int argc, int b) >+{ >+ /* We should be able to get rid of the a - i. */ >+ int i; >+ for (i = 0; i < 50; i++) >+ { >+ int a = b + i; >+ int c = a - i; >+ int d = argc + b; >+ printf ("%d %d\n", c,d); >+ } >+} >+/* { dg-final { scan-tree-dump-times "a - i" 0 "optimized"} } */ >+/* { dg-final { cleanup-tree-dump "optimized" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c >@@ -0,0 +1,13 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-reassoc" } */ >+int main(int a, int b, int c, int d) >+{ >+ /* Should be transformed into a + c + 8 */ >+ int e = a + 3; >+ int f = c + 5; >+ int g = e + f; >+ return g; >+} >+ >+/* { dg-final { scan-tree-dump-times "\\\+ 8" 1 "reassoc"} } */ >+/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c >@@ -0,0 +1,12 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-reassoc" } */ >+int main(int a, int b, int c, int d, int e, int f, int g, int h) >+{ >+ /* Should be transformed into a + c + d + e + g + 15 */ >+ int i = (a + 9) + (c + d); >+ int j = (e + 4) + (2 + g); >+ e = i + j; >+ return e; >+} >+/* { dg-final { scan-tree-dump-times "\\\+ 15" 1 "reassoc"} } */ >+/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c >@@ -0,0 +1,13 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-reassoc" } */ >+ >+int main(int a, int b, int c, int d, int e, int f, int g, int h) >+{ >+ /* e & ~e -> 0 */ >+ int i = (a & 9) & (c & d); >+ int j = (~e & d) & (~c & e); >+ e = i & j; >+ return e; >+} >+/* { dg-final { scan-tree-dump-times "= 0" 1 "reassoc"} } */ >+/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c >=================================================================== >--- /dev/null >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c >@@ -0,0 +1,14 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-reassoc" } */ >+ >+int main(int a, int b, int c, int d, int e, int f, int g, int h) >+{ >+ /* Should be transformed into e = 20 */ >+ int i = (a + 9) + (c + 8); >+ int j = (-c + 1) + (-a + 2); >+ >+ e = i + j; >+ return e; >+} >+/* { dg-final { scan-tree-dump-times "= 20" 1 "reassoc"} } */ >+/* { dg-final { cleanup-tree-dump "reassoc" } } */ >Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c >=================================================================== >--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c >+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c >@@ -16,6 +16,7 @@ int motion_test1(int data, int data_0, i > return v * t * u; > } > /* We should eliminate one computation of data_0 + data_3 along the >- main path, causing one reload. */ >-/* { dg-final { scan-tree-dump-times "Eliminated: 1" 1 "pre"} } */ >+ main path, and one computation of v * i along the main path, causing >+ two eliminations. */ >+/* { dg-final { scan-tree-dump-times "Eliminated: 2" 1 "pre"} } */ > /* { dg-final { cleanup-tree-dump "pre" } } */ >Index: gcc41/gcc/tree-flow.h >=================================================================== >--- gcc41.orig/gcc/tree-flow.h >+++ gcc41/gcc/tree-flow.h >@@ -896,4 +896,6 @@ void delete_alias_heapvars (void); > > #include "tree-flow-inline.h" > >+void swap_tree_operands (tree, tree *, tree *); >+ > #endif /* _TREE_FLOW_H */ >Index: gcc41/gcc/tree-ssa-dom.c >=================================================================== >--- gcc41.orig/gcc/tree-ssa-dom.c >+++ gcc41/gcc/tree-ssa-dom.c >@@ -588,6 +588,45 @@ struct tree_opt_pass pass_dominator = > }; > > >+/* Given a stmt CONDSTMT containing a COND_EXPR, canonicalize the >+ COND_EXPR into a canonical form. */ >+ >+static void >+canonicalize_comparison (tree condstmt) >+{ >+ tree cond = COND_EXPR_COND (condstmt); >+ tree op0; >+ tree op1; >+ enum tree_code code = TREE_CODE (cond); >+ >+ if (!COMPARISON_CLASS_P (cond)) >+ return; >+ >+ op0 = TREE_OPERAND (cond, 0); >+ op1 = TREE_OPERAND (cond, 1); >+ >+ /* If it would be profitable to swap the operands, then do so to >+ canonicalize the statement, enabling better optimization. >+ >+ By placing canonicalization of such expressions here we >+ transparently keep statements in canonical form, even >+ when the statement is modified. */ >+ if (tree_swap_operands_p (op0, op1, false)) >+ { >+ /* For relationals we need to swap the operands >+ and change the code. */ >+ if (code == LT_EXPR >+ || code == GT_EXPR >+ || code == LE_EXPR >+ || code == GE_EXPR) >+ { >+ TREE_SET_CODE (cond, swap_tree_comparison (code)); >+ swap_tree_operands (condstmt, >+ &TREE_OPERAND (cond, 0), >+ &TREE_OPERAND (cond, 1)); >+ } >+ } >+} > /* We are exiting E->src, see if E->dest ends with a conditional > jump which has a known value when reached via E. > >@@ -799,7 +838,10 @@ thread_across_edge (struct dom_walk_data > /* Now temporarily cprop the operands and try to find the resulting > expression in the hash tables. */ > if (TREE_CODE (stmt) == COND_EXPR) >- cond = COND_EXPR_COND (stmt); >+ { >+ canonicalize_comparison (stmt); >+ cond = COND_EXPR_COND (stmt); >+ } > else if (TREE_CODE (stmt) == GOTO_EXPR) > cond = GOTO_DESTINATION (stmt); > else >@@ -2919,6 +2961,9 @@ optimize_stmt (struct dom_walk_data *wal > > old_stmt = stmt = bsi_stmt (si); > >+ if (TREE_CODE (stmt) == COND_EXPR) >+ canonicalize_comparison (stmt); >+ > update_stmt_if_modified (stmt); > ann = stmt_ann (stmt); > opt_stats.num_stmts++; >Index: gcc41/gcc/tree-ssa-operands.c >=================================================================== >--- gcc41.orig/gcc/tree-ssa-operands.c >+++ gcc41/gcc/tree-ssa-operands.c >@@ -1047,7 +1047,6 @@ swap_tree_operands (tree stmt, tree *exp > *exp1 = op0; > } > >- > /* Recursively scan the expression pointed to by EXPR_P in statement referred > to by INFO. FLAGS is one of the OPF_* constants modifying how to interpret > the operands found. */ >@@ -1257,39 +1256,6 @@ get_expr_operands (tree stmt, tree *expr > case ASSERT_EXPR: > do_binary: > { >- tree op0 = TREE_OPERAND (expr, 0); >- tree op1 = TREE_OPERAND (expr, 1); >- >- /* If it would be profitable to swap the operands, then do so to >- canonicalize the statement, enabling better optimization. >- >- By placing canonicalization of such expressions here we >- transparently keep statements in canonical form, even >- when the statement is modified. */ >- if (tree_swap_operands_p (op0, op1, false)) >- { >- /* For relationals we need to swap the operands >- and change the code. */ >- if (code == LT_EXPR >- || code == GT_EXPR >- || code == LE_EXPR >- || code == GE_EXPR) >- { >- TREE_SET_CODE (expr, swap_tree_comparison (code)); >- swap_tree_operands (stmt, >- &TREE_OPERAND (expr, 0), >- &TREE_OPERAND (expr, 1)); >- } >- >- /* For a commutative operator we can just swap the operands. */ >- else if (commutative_tree_code (code)) >- { >- swap_tree_operands (stmt, >- &TREE_OPERAND (expr, 0), >- &TREE_OPERAND (expr, 1)); >- } >- } >- > get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags); > get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags); > return; >Index: gcc41/gcc/tree-ssa-reassoc.c >=================================================================== >--- gcc41.orig/gcc/tree-ssa-reassoc.c >+++ gcc41/gcc/tree-ssa-reassoc.c >@@ -33,249 +33,220 @@ Boston, MA 02110-1301, USA. */ > #include "tree-gimple.h" > #include "tree-dump.h" > #include "timevar.h" >-#include "hashtab.h" > #include "tree-iterator.h" > #include "tree-pass.h" >+#include "alloc-pool.h" >+#include "vec.h" >+#include "langhooks.h" > >-/* This is a simple global reassociation pass that uses a combination >- of heuristics and a hashtable to try to expose more operations to >- CSE. >- >- The basic idea behind the heuristic is to rank expressions by >- depth of the computation tree and loop depth, and try to produce >- expressions consisting of small rank operations, as they are more >- likely to reoccur. In addition, we use a hashtable to try to see >- if we can transpose an operation into something we have seen >- before. >- >- Note that the way the hashtable is structured will sometimes find >- matches that will not expose additional redundancies, since it is >- not unwound as we traverse back up one branch of the dominator >- tree and down another. However, the cost of improving this is >- probably not worth the additional benefits it will bring. */ >+/* This is a simple global reassociation pass. It is, in part, based >+ on the LLVM pass of the same name (They do some things more/less >+ than we do, in different orders, etc). > >-/* Statistics */ >-static struct >-{ >- int reassociated_by_rank; >- int reassociated_by_match; >-} reassociate_stats; >+ It consists of five steps: > >+ 1. Breaking up subtract operations into addition + negate, where >+ it would promote the reassociation of adds. > >+ 2. Left linearization of the expression trees, so that (A+B)+(C+D) >+ becomes (((A+B)+C)+D), which is easier for us to rewrite later. >+ During linearization, we place the operands of the binary >+ expressions into the a vector of operand_entry_t > >-/* Seen binary operator hashtable. */ >-static htab_t seen_binops; >+ 3. Optimization of the operand lists, eliminating things like a + >+ -a, a & a, etc. > >-/* Binary operator struct. */ >+ 4. Rewrite the expression trees we linearized and optimized so >+ they are in proper rank order. > >-typedef struct seen_binop_d >-{ >- tree op1; >- tree op2; >-} *seen_binop_t; >+ 5. Repropagate negates, as nothing else will clean it up ATM. > >-/* Return a SEEN_BINOP_T if we have seen an associative binary >- operator with OP1 and OP2 in it. */ >+ A bit of theory on #4, since nobody seems to write anything down >+ about why it makes sense to do it the way they do it: > >-static seen_binop_t >-find_seen_binop (tree op1, tree op2) >-{ >- void **slot; >- struct seen_binop_d sbd; >- sbd.op1 = op1; >- sbd.op2 = op2; >- slot = htab_find_slot (seen_binops, &sbd, NO_INSERT); >- if (!slot) >- return NULL; >- return ((seen_binop_t) *slot); >-} >+ We could do this much nicer theoretically, but don't (for reasons >+ explained after how to do it theoretically nice :P). > >-/* Insert a binary operator consisting of OP1 and OP2 into the >- SEEN_BINOP table. */ >+ In order to promote the most redundancy elimination, you want >+ binary expressions whose operands are the same rank (or >+ preferrably, the same value) exposed to the redundancy eliminator, >+ for possible elimination. > >-static void >-insert_seen_binop (tree op1, tree op2) >-{ >- void **slot; >- seen_binop_t new_pair = xmalloc (sizeof (*new_pair)); >- new_pair->op1 = op1; >- new_pair->op2 = op2; >- slot = htab_find_slot (seen_binops, new_pair, INSERT); >- if (*slot != NULL) >- free (*slot); >- *slot = new_pair; >-} >+ So the way to do this if we really cared, is to build the new op >+ tree from the leaves to the roots, merging as you go, and putting the >+ new op on the end of the worklist, until you are left with one >+ thing on the worklist. > >-/* Return the hash value for a seen binop structure pointed to by P. >- Because all the binops we consider are associative, we just add the >- hash value for op1 and op2. */ >+ IE if you have to rewrite the following set of operands (listed with >+ rank in parentheses), with opcode PLUS_EXPR: > >-static hashval_t >-seen_binop_hash (const void *p) >-{ >- const seen_binop_t sb = (seen_binop_t) p; >- return iterative_hash_expr (sb->op1, 0) + iterative_hash_expr (sb->op2, 0); >-} >+ a (1), b (1), c (1), d (2), e (2) > >-/* Return true if two seen binop structures pointed to by P1 and P2 are equal. >- We have to check the operators both ways because we don't know what >- order they appear in the table. */ > >-static int >-seen_binop_eq (const void *p1, const void *p2) >-{ >- const seen_binop_t sb1 = (seen_binop_t) p1; >- const seen_binop_t sb2 = (seen_binop_t) p2; >- return (sb1->op1 == sb2->op1 && sb1->op2 == sb2->op2) >- || (sb1->op2 == sb2->op1 && sb1->op1 == sb2->op2); >-} >+We start with our merge worklist empty, and the ops list with all of >+those on it. >+ >+You want to first merge all leaves of the same rank, as much as >+possible. >+ >+So first build a binary op of >+ >+mergetmp = a + b, and put "mergetmp" on the merge worklist. >+ >+Because there is no three operand form of PLUS_EXPR, c is not going to >+be exposed to redundancy elimination as a rank 1 operand. >+ >+So you might as well throw it on the merge worklist (you could also >+consider it to now be a rank two operand, and merge it with d and e, >+but in this case, you then have evicted e from a binary op. So at >+least in this situation, you can't win.) >+ >+Then build a binary op of d + e >+mergetmp2 = d + e >+ >+and put mergetmp2 on the merge worklist. >+ >+so merge worklist = {mergetmp, c, mergetmp2} >+ >+Continue building binary ops of these operations until you have only >+one operation left on the worklist. >+ >+So we have >+ >+build binary op >+mergetmp3 = mergetmp + c > >-/* Value rank structure. */ >+worklist = {mergetmp2, mergetmp3} > >-typedef struct valrank_d >+mergetmp4 = mergetmp2 + mergetmp3 >+ >+worklist = {mergetmp4} >+ >+because we have one operation left, we can now just set the original >+statement equal to the result of that operation. >+ >+This will at least expose a + b and d + e to redundancy elimination >+as binary operations. >+ >+For extra points, you can reuse the old statements to build the >+mergetmps, since you shouldn't run out. >+ >+ >+So why don't we do this? >+ >+Because it's expensive, and rarely will help. Most trees we are >+reassociating have 3 or less ops. If they have 2 ops, they already >+will be written into a nice single binary op. If you have 3 ops, a >+single simple check suffices to tell you whether the first two are of the >+same rank. If so, you know to order it >+ >+mergetmp = op1 + op2 >+newstmt = mergetmp + op3 >+ >+instead of >+mergetmp = op2 + op3 >+newstmt = mergetmp + op1 >+ >+If all three are of the same rank, you can't expose them all in a >+single binary operator anyway, so the above is *still* the best you >+can do. >+ >+Thus, this is what we do. When we have three ops left, we check to see >+what order to put them in, and call it a day. As a nod to vector sum >+reduction, we check if any of ops are a really a phi node that is a >+destructive update for the associating op, and keep the destructive >+update together for vector sum reduction recognition. */ >+ >+ >+/* Statistics */ >+static struct >+{ >+ int linearized; >+ int constants_eliminated; >+ int ops_eliminated; >+ int rewritten; >+} reassociate_stats; >+ >+/* Operator, rank pair. */ >+typedef struct operand_entry > { >- tree e; >- unsigned int rank; >-} *valrank_t; >+ unsigned int rank; >+ tree op; >+} *operand_entry_t; >+ >+static alloc_pool operand_entry_pool; >+ > > /* Starting rank number for a given basic block, so that we can rank > operations using unmovable instructions in that BB based on the bb > depth. */ > static unsigned int *bb_rank; > >-/* Value rank hashtable. */ >-static htab_t value_rank; >+/* Operand->rank hashtable. */ >+static htab_t operand_rank; > > >-/* Look up the value rank structure for expression E. */ >+/* Look up the operand rank structure for expression E. */ > >-static valrank_t >-find_value_rank (tree e) >+static operand_entry_t >+find_operand_rank (tree e) > { > void **slot; >- struct valrank_d vrd; >- vrd.e = e; >- slot = htab_find_slot (value_rank, &vrd, NO_INSERT); >+ struct operand_entry vrd; >+ >+ vrd.op = e; >+ slot = htab_find_slot (operand_rank, &vrd, NO_INSERT); > if (!slot) > return NULL; >- return ((valrank_t) *slot); >+ return ((operand_entry_t) *slot); > } > >-/* Insert {E,RANK} into the value rank hashtable. */ >+/* Insert {E,RANK} into the operand rank hashtable. */ > > static void >-insert_value_rank (tree e, unsigned int rank) >+insert_operand_rank (tree e, unsigned int rank) > { > void **slot; >- valrank_t new_pair = xmalloc (sizeof (*new_pair)); >- new_pair->e = e; >+ operand_entry_t new_pair = pool_alloc (operand_entry_pool); >+ >+ new_pair->op = e; > new_pair->rank = rank; >- slot = htab_find_slot (value_rank, new_pair, INSERT); >+ slot = htab_find_slot (operand_rank, new_pair, INSERT); > gcc_assert (*slot == NULL); > *slot = new_pair; >- > } > >- >-/* Return the hash value for a value rank structure */ >+/* Return the hash value for a operand rank structure */ > > static hashval_t >-valrank_hash (const void *p) >+operand_entry_hash (const void *p) > { >- const valrank_t vr = (valrank_t) p; >- return iterative_hash_expr (vr->e, 0); >+ const operand_entry_t vr = (operand_entry_t) p; >+ return iterative_hash_expr (vr->op, 0); > } > >-/* Return true if two value rank structures are equal. */ >+/* Return true if two operand rank structures are equal. */ > > static int >-valrank_eq (const void *p1, const void *p2) >-{ >- const valrank_t vr1 = (valrank_t) p1; >- const valrank_t vr2 = (valrank_t) p2; >- return vr1->e == vr2->e; >-} >- >- >-/* Initialize the reassociation pass. */ >- >-static void >-init_reassoc (void) >+operand_entry_eq (const void *p1, const void *p2) > { >- int i; >- unsigned int rank = 2; >- >- tree param; >- int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int)); >- >- memset (&reassociate_stats, 0, sizeof (reassociate_stats)); >- >- /* Reverse RPO (Reverse Post Order) will give us something where >- deeper loops come later. */ >- flow_reverse_top_sort_order_compute (bbs); >- bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int)); >- value_rank = htab_create (511, valrank_hash, >- valrank_eq, free); >- seen_binops = htab_create (511, seen_binop_hash, >- seen_binop_eq, free); >- >- /* Give each argument a distinct rank. */ >- for (param = DECL_ARGUMENTS (current_function_decl); >- param; >- param = TREE_CHAIN (param)) >- { >- if (default_def (param) != NULL) >- { >- tree def = default_def (param); >- insert_value_rank (def, ++rank); >- } >- } >- /* Give the chain decl a distinct rank. */ >- if (cfun->static_chain_decl != NULL) >- { >- tree def = default_def (cfun->static_chain_decl); >- if (def != NULL) >- insert_value_rank (def, ++rank); >- } >- >- /* Set up rank for each BB */ >- for (i = 0; i < n_basic_blocks; i++) >- bb_rank[bbs[i]] = ++rank << 16; >- >- free (bbs); >- calculate_dominance_info (CDI_DOMINATORS); >- >+ const operand_entry_t vr1 = (operand_entry_t) p1; >+ const operand_entry_t vr2 = (operand_entry_t) p2; >+ return vr1->op == vr2->op; > } > >-/* Cleanup after the reassociation pass, and print stats if >- requested. */ >- >-static void >-fini_reassoc (void) >-{ >- >- if (dump_file && (dump_flags & TDF_STATS)) >- { >- fprintf (dump_file, "Reassociation stats:\n"); >- fprintf (dump_file, "Reassociated by rank: %d\n", reassociate_stats.reassociated_by_rank); >- fprintf (dump_file, "Reassociated by match: %d\n", reassociate_stats.reassociated_by_match); >- } >- htab_delete (value_rank); >- htab_delete (seen_binops); >- free (bb_rank); >-} > > /* Given an expression E, return the rank of the expression. */ > > static unsigned int > get_rank (tree e) > { >- valrank_t vr; >+ operand_entry_t vr; > >- /* Constants have rank 0. */ >+ /* Constants have rank 0. */ > if (is_gimple_min_invariant (e)) > return 0; >- >+ > /* SSA_NAME's have the rank of the expression they are the result > of. > For globals and uninitialized values, the rank is 0. >@@ -290,24 +261,24 @@ get_rank (tree e) > if (TREE_CODE (e) == SSA_NAME) > { > tree stmt; >- tree rhs; >+ tree rhs; > unsigned int rank, maxrank; > int i; >- >+ > if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL > && e == default_def (SSA_NAME_VAR (e))) >- return find_value_rank (e)->rank; >- >+ return find_operand_rank (e)->rank; >+ > stmt = SSA_NAME_DEF_STMT (e); > if (bb_for_stmt (stmt) == NULL) > return 0; >- >+ > if (TREE_CODE (stmt) != MODIFY_EXPR > || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) > return bb_rank[bb_for_stmt (stmt)->index]; > > /* If we already have a rank for this expression, use that. */ >- vr = find_value_rank (e); >+ vr = find_operand_rank (e); > if (vr) > return vr->rank; > >@@ -318,24 +289,25 @@ get_rank (tree e) > rhs = TREE_OPERAND (stmt, 1); > if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0) > rank = MAX (rank, get_rank (rhs)); >- else >+ else > { >- for (i = 0; >- i < TREE_CODE_LENGTH (TREE_CODE (rhs)) >+ for (i = 0; >+ i < TREE_CODE_LENGTH (TREE_CODE (rhs)) > && TREE_OPERAND (rhs, i) >- && rank != maxrank; i++) >+ && rank != maxrank; >+ i++) > rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i))); > } >- >+ > if (dump_file && (dump_flags & TDF_DETAILS)) > { > fprintf (dump_file, "Rank for "); > print_generic_expr (dump_file, e, 0); > fprintf (dump_file, " is %d\n", (rank + 1)); > } >- >+ > /* Note the rank in the hashtable so we don't recompute it. */ >- insert_value_rank (e, (rank + 1)); >+ insert_operand_rank (e, (rank + 1)); > return (rank + 1); > } > >@@ -343,282 +315,1241 @@ get_rank (tree e) > return 0; > } > >+DEF_VEC_P(operand_entry_t); >+DEF_VEC_ALLOC_P(operand_entry_t, heap); >+ >+/* We want integer ones to end up last no matter what, since they are >+ the ones we can do the most with. */ >+#define INTEGER_CONST_TYPE 1 << 3 >+#define FLOAT_CONST_TYPE 1 << 2 >+#define OTHER_CONST_TYPE 1 << 1 >+ >+/* Classify an invariant tree into integer, float, or other, so that >+ we can sort them to be near other constants of the same type. */ >+static inline int >+constant_type (tree t) >+{ >+ if (INTEGRAL_TYPE_P (TREE_TYPE (t))) >+ return INTEGER_CONST_TYPE; >+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) >+ return FLOAT_CONST_TYPE; >+ else >+ return OTHER_CONST_TYPE; >+} >+ >+/* qsort comparison function to sort operand entries PA and PB by rank >+ so that the sorted array is ordered by rank in decreasing order. */ >+static int >+sort_by_operand_rank (const void *pa, const void *pb) >+{ >+ const operand_entry_t oea = *(const operand_entry_t *)pa; >+ const operand_entry_t oeb = *(const operand_entry_t *)pb; >+ >+ /* It's nicer for optimize_expression if constants that are likely >+ to fold when added/multiplied//whatever are put next to each >+ other. Since all constants have rank 0, order them by type. */ >+ if (oeb->rank == 0 && oea->rank == 0) >+ return constant_type (oeb->op) - constant_type (oea->op); >+ >+ /* Lastly, make sure the versions that are the same go next to each >+ other. We use SSA_NAME_VERSION because it's stable. */ >+ if ((oeb->rank - oea->rank == 0) >+ && TREE_CODE (oea->op) == SSA_NAME >+ && TREE_CODE (oeb->op) == SSA_NAME) >+ return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); >+ >+ return oeb->rank - oea->rank; >+} >+ >+/* Add an operand entry to *OPS for the tree operand OP. */ >+ >+static void >+add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op) >+{ >+ operand_entry_t oe = pool_alloc (operand_entry_pool); >+ >+ oe->op = op; >+ oe->rank = get_rank (op); >+ VEC_safe_push (operand_entry_t, heap, *ops, oe); >+} > >-/* Decide whether we should transpose RHS and some operand of >- LHSDEFOP. >- If yes, then return true and set TAKEOP to the operand number of LHSDEFOP to >- switch RHS for. >- Otherwise, return false. */ >+/* Return true if STMT is reassociable operation containing a binary >+ operation with tree code CODE. */ > > static bool >-should_transpose (tree rhs ATTRIBUTE_UNUSED, >- unsigned int rhsrank, >- tree lhsdefop, unsigned int *takeop) >-{ >- /* Attempt to expose the low ranked >- arguments to CSE if we have something like: >- a = <rank 2> + c (rank 1) >- b = a (rank 3) + d (rank 1) >- We want to transform this into: >- a = c + d >- b = <rank 2> + <rank 3> >- >- The op finding part wouldn't be necessary if >- we could swap the operands above and not have >- update_stmt change them back on us. >- */ >- unsigned int lowrankop; >- unsigned int lowrank; >- unsigned int highrank; >- unsigned int highrankop; >- unsigned int temp; >- >- lowrankop = 0; >- *takeop = 1; >- lowrank = get_rank (TREE_OPERAND (lhsdefop, 0)); >- temp = get_rank (TREE_OPERAND (lhsdefop, 1)); >- highrank = temp; >- highrankop = 1; >- if (temp < lowrank) >- { >- lowrankop = 1; >- highrankop = 0; >- *takeop = 0; >- highrank = lowrank; >- lowrank = temp; >- } >- >- /* If highrank == lowrank, then we had something >- like: >- a = <rank 1> + <rank 1> >- already, so there is no guarantee that >- swapping our argument in is going to be >- better. >- If we run reassoc twice, we could probably >- have a flag that switches this behavior on, >- so that we try once without it, and once with >- it, so that redundancy elimination sees it >- both ways. >- */ >- >- if (lowrank == rhsrank && highrank != lowrank) >+is_reassociable_op (tree stmt, enum tree_code code) >+{ >+ if (!IS_EMPTY_STMT (stmt) >+ && TREE_CODE (stmt) == MODIFY_EXPR >+ && TREE_CODE (TREE_OPERAND (stmt, 1)) == code >+ && has_single_use (TREE_OPERAND (stmt, 0))) > return true; >- >- /* Also, see if the LHS's high ranked op should be switched with our >- RHS simply because it is greater in rank than our current RHS. */ >- if (TREE_CODE (TREE_OPERAND (lhsdefop, highrankop)) == SSA_NAME) >- { >- tree iop = SSA_NAME_DEF_STMT (TREE_OPERAND (lhsdefop, highrankop)); >- if (TREE_CODE (iop) == MODIFY_EXPR) >- iop = TREE_OPERAND (iop, 1); >- if (TREE_CODE (iop) == TREE_CODE (lhsdefop)) >- *takeop = 1; >- if (rhsrank < get_rank (TREE_OPERAND (lhsdefop, *takeop))) >- return true; >- } >- > return false; > } > >-/* Attempt to reassociate the associative binary operator BEXPR, which >- is in the statement pointed to by CURRBSI. Return true if we >- changed the statement. */ >+ >+/* Given NAME, if NAME is defined by a unary operation OPCODE, return the >+ operand of the negate operation. Otherwise, return NULL. */ >+ >+static tree >+get_unary_op (tree name, enum tree_code opcode) >+{ >+ tree stmt = SSA_NAME_DEF_STMT (name); >+ tree rhs; >+ >+ if (TREE_CODE (stmt) != MODIFY_EXPR) >+ return NULL_TREE; >+ >+ rhs = TREE_OPERAND (stmt, 1); >+ if (TREE_CODE (rhs) == opcode) >+ return TREE_OPERAND (rhs, 0); >+ return NULL_TREE; >+} >+ >+/* If CURR and LAST are a pair of ops that OPCODE allows us to >+ eliminate through equivalences, do so, remove them from OPS, and >+ return true. Otherwise, return false. */ > > static bool >-reassociate_expr (tree bexpr, block_stmt_iterator *currbsi) >+eliminate_duplicate_pair (enum tree_code opcode, >+ VEC (operand_entry_t, heap) **ops, >+ bool *all_done, >+ unsigned int i, >+ operand_entry_t curr, >+ operand_entry_t last) > { >- tree lhs = TREE_OPERAND (bexpr, 0); >- tree rhs = TREE_OPERAND (bexpr, 1); >- tree lhsdef; >- tree lhsi; >- bool changed = false; >- unsigned int lhsrank = get_rank (lhs); >- unsigned int rhsrank = get_rank (rhs); >- >- /* If unsafe math optimizations we can do reassociation for non-integral >- types. */ >- if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >- || !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) >- && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) >- || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) >- || !flag_unsafe_math_optimizations)) >- return false; >- >- /* We want the greater ranked operand to be our "LHS" for simplicity >- sake. There is no point in actually modifying the expression, as >- update_stmt will simply resort the operands anyway. */ >- if (lhsrank < rhsrank) >+ >+ /* If we have two of the same op, and the opcode is & or |, we can >+ eliminate one of them. >+ If we have two of the same op, and the opcode is ^, we can >+ eliminate both of them. */ >+ >+ if (last && last->op == curr->op) > { >- tree temp; >- unsigned int temp1; >- temp = lhs; >- lhs = rhs; >- rhs = temp; >- temp1 = lhsrank; >- lhsrank = rhsrank; >- rhsrank = temp1; >- } >- >- /* If the high ranked operand is an SSA_NAME, and the binary >- operator is not something we've already seen somewhere else >- (i.e., it may be redundant), attempt to reassociate it. >- >- We can't reassociate expressions unless the expression we are >- going to reassociate with is only used in our current expression, >- or else we may screw up other computations, like so: >- >- a = b + c >- e = a + d >- >- g = a + f >- >- We cannot reassociate and rewrite the "a = ..." , >- because that would change the value of the computation of >- "g = a + f". */ >- if (TREE_CODE (lhs) == SSA_NAME && !find_seen_binop (lhs, rhs)) >- { >- lhsdef = SSA_NAME_DEF_STMT (lhs); >- if (TREE_CODE (lhsdef) == MODIFY_EXPR) >- { >- lhsi = TREE_OPERAND (lhsdef, 1); >- if (TREE_CODE (lhsi) == TREE_CODE (bexpr)) >- { >- use_operand_p use; >- tree usestmt; >- if (single_imm_use (lhs, &use, &usestmt)) >- { >- unsigned int takeop = 0; >- unsigned int otherop = 1; >- bool foundmatch = false; >- bool foundrank = false; >- >- /* If we can easily transpose this into an operation >- we've already seen, let's do that. >- otherwise, let's try to expose low ranked ops to >- CSE. */ >- if (find_seen_binop (TREE_OPERAND (lhsi, 1), rhs)) >- { >- takeop = 0; >- otherop = 1; >- foundmatch = true; >- } >- else if (find_seen_binop (TREE_OPERAND (lhsi, 0), >- rhs)) >- { >- takeop = 1; >- otherop = 0; >- foundmatch = true; >- } >- else if (should_transpose (rhs, rhsrank, lhsi, >- &takeop)) >- { >- foundrank = true; >- } >- if (foundmatch || foundrank) >- { >- block_stmt_iterator lhsbsi = bsi_for_stmt (lhsdef); >- if (dump_file && (dump_flags & TDF_DETAILS)) >- { >- fprintf (dump_file, "Reassociating by %s\n", >- foundmatch ? "match" : "rank"); >- fprintf (dump_file, "Before LHS:"); >- print_generic_stmt (dump_file, lhsi, 0); >- fprintf (dump_file, "Before curr expr:"); >- print_generic_stmt (dump_file, bexpr, 0); >- } >- TREE_OPERAND (bexpr, 0) = TREE_OPERAND (lhsi, takeop); >- TREE_OPERAND (lhsi, takeop) = rhs; >- TREE_OPERAND (bexpr, 1) = TREE_OPERAND (lhsdef, 0); >- if (dump_file && (dump_flags & TDF_DETAILS)) >- { >- fprintf (dump_file, "After LHS:"); >- print_generic_stmt (dump_file, lhsi, 0); >- fprintf (dump_file, "After curr expr:"); >- print_generic_stmt (dump_file, bexpr, 0); >- } >- bsi_move_before (&lhsbsi, currbsi); >- update_stmt (lhsdef); >- update_stmt (bsi_stmt (*currbsi)); >- lhsbsi = bsi_for_stmt (lhsdef); >- update_stmt (bsi_stmt (lhsbsi)); >- >- /* If update_stmt didn't reorder our operands, >- we'd like to recurse on the expression we >- just reassociated and reassociate it >- top-down, exposing further opportunities. >- Unfortunately, update_stmt does reorder them, >- so we can't do this cheaply. */ >- if (!foundmatch) >- reassociate_stats.reassociated_by_rank++; >- else >- reassociate_stats.reassociated_by_match++; >- return true; >- } >- } >+ switch (opcode) >+ { >+ case BIT_IOR_EXPR: >+ case BIT_AND_EXPR: >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Equivalence: "); >+ print_generic_expr (dump_file, curr->op, 0); >+ fprintf (dump_file, " [&|] "); >+ print_generic_expr (dump_file, last->op, 0); >+ fprintf (dump_file, " -> "); >+ print_generic_stmt (dump_file, last->op, 0); >+ } >+ >+ VEC_ordered_remove (operand_entry_t, *ops, i); >+ reassociate_stats.ops_eliminated ++; >+ >+ return true; >+ >+ case BIT_XOR_EXPR: >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Equivalence: "); >+ print_generic_expr (dump_file, curr->op, 0); >+ fprintf (dump_file, " ^ "); >+ print_generic_expr (dump_file, last->op, 0); >+ fprintf (dump_file, " -> nothing\n"); > } >+ >+ reassociate_stats.ops_eliminated += 2; >+ >+ if (VEC_length (operand_entry_t, *ops) == 2) >+ { >+ VEC_free (operand_entry_t, heap, *ops); >+ *ops = NULL; >+ add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op), >+ integer_zero_node)); >+ *all_done = true; >+ } >+ else >+ { >+ VEC_ordered_remove (operand_entry_t, *ops, i-1); >+ VEC_ordered_remove (operand_entry_t, *ops, i-1); >+ } >+ >+ return true; >+ >+ default: >+ break; > } > } >- return changed; >+ return false; > } > >-/* Reassociate expressions in basic block BB and its dominator as >- children , return true if any >- expressions changed. */ >+/* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression, >+ look in OPS for a corresponding positive operation to cancel it >+ out. If we find one, remove the other from OPS, replace >+ OPS[CURRINDEX] with 0, and return true. Otherwise, return >+ false. */ > > static bool >-reassociate_bb (basic_block bb) >+eliminate_plus_minus_pair (enum tree_code opcode, >+ VEC (operand_entry_t, heap) **ops, >+ unsigned int currindex, >+ operand_entry_t curr) > { >- bool changed = false; >- block_stmt_iterator bsi; >- basic_block son; >+ tree negateop; >+ unsigned int i; >+ operand_entry_t oe; > >- for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) >+ if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) >+ return false; >+ >+ negateop = get_unary_op (curr->op, NEGATE_EXPR); >+ if (negateop == NULL_TREE) >+ return false; >+ >+ /* Any non-negated version will have a rank that is one less than >+ the current rank. So once we hit those ranks, if we don't find >+ one, we can stop. */ >+ >+ for (i = currindex; >+ VEC_iterate (operand_entry_t, *ops, i, oe) && oe->rank >= curr->rank - 1 ; >+ i++) > { >- tree stmt = bsi_stmt (bsi); >- >- if (TREE_CODE (stmt) == MODIFY_EXPR) >+ if (oe->op == negateop && i != currindex) > { >- tree rhs = TREE_OPERAND (stmt, 1); >- if (associative_tree_code (TREE_CODE (rhs))) >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) > { >- if (reassociate_expr (rhs, &bsi)) >- { >- changed = true; >- update_stmt (stmt); >- } >- insert_seen_binop (TREE_OPERAND (rhs, 0), >- TREE_OPERAND (rhs, 1)); >+ fprintf (dump_file, "Equivalence: "); >+ print_generic_expr (dump_file, negateop, 0); >+ fprintf (dump_file, " + -"); >+ print_generic_expr (dump_file, oe->op, 0); >+ fprintf (dump_file, " -> 0\n"); > } >+ >+ VEC_ordered_remove (operand_entry_t, *ops, i); >+ add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op), >+ integer_zero_node)); >+ VEC_ordered_remove (operand_entry_t, *ops, currindex); >+ reassociate_stats.ops_eliminated ++; >+ >+ return true; > } > } >- for (son = first_dom_son (CDI_DOMINATORS, bb); >- son; >- son = next_dom_son (CDI_DOMINATORS, son)) >- { >- changed |= reassociate_bb (son); >- } >- return changed; >+ >+ return false; > } > >- >+/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a >+ bitwise not expression, look in OPS for a corresponding operand to >+ cancel it out. If we find one, remove the other from OPS, replace >+ OPS[CURRINDEX] with 0, and return true. Otherwise, return >+ false. */ >+ > static bool >-do_reassoc (void) >-{ >- bool changed = false; >- >- changed = reassociate_bb (ENTRY_BLOCK_PTR); >+eliminate_not_pairs (enum tree_code opcode, >+ VEC (operand_entry_t, heap) **ops, >+ unsigned int currindex, >+ operand_entry_t curr) >+{ >+ tree notop; >+ unsigned int i; >+ operand_entry_t oe; >+ >+ if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) >+ || TREE_CODE (curr->op) != SSA_NAME) >+ return false; >+ >+ notop = get_unary_op (curr->op, BIT_NOT_EXPR); >+ if (notop == NULL_TREE) >+ return false; >+ >+ /* Any non-not version will have a rank that is one less than >+ the current rank. So once we hit those ranks, if we don't find >+ one, we can stop. */ >+ >+ for (i = currindex; >+ VEC_iterate (operand_entry_t, *ops, i, oe) && oe->rank >= curr->rank - 1 ; >+ i++) >+ { >+ if (oe->op == notop && i != currindex) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Equivalence: "); >+ print_generic_expr (dump_file, notop, 0); >+ if (opcode == BIT_AND_EXPR) >+ fprintf (dump_file, " & ~"); >+ else if (opcode == BIT_IOR_EXPR) >+ fprintf (dump_file, " | ~"); >+ print_generic_expr (dump_file, oe->op, 0); >+ if (opcode == BIT_AND_EXPR) >+ fprintf (dump_file, " -> 0\n"); >+ else if (opcode == BIT_IOR_EXPR) >+ fprintf (dump_file, " -> -1\n"); >+ } >+ >+ if (opcode == BIT_AND_EXPR) >+ oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node); >+ else if (opcode == BIT_IOR_EXPR) >+ oe->op = build_low_bits_mask (TREE_TYPE (oe->op), >+ TYPE_PRECISION (TREE_TYPE (oe->op))); >+ >+ reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; >+ VEC_free (operand_entry_t, heap, *ops); >+ *ops = NULL; >+ VEC_safe_push (operand_entry_t, heap, *ops, oe); >+ return true; >+ } >+ } > >- return changed; >+ return false; > } > >+/* Use constant value that may be present in OPS to try to eliminate >+ operands. Note that this function is only really used when we've >+ eliminated ops for other reasons, or merged constants. Across >+ single statements, fold already does all of this, plus more. There >+ is little point in duplicating logic, so I've only included the >+ identities that I could ever construct testcases to trigger. */ > >-/* Gate and execute functions for Reassociation. */ >+static void >+eliminate_using_constants (enum tree_code opcode, >+ VEC(operand_entry_t, heap) **ops) >+{ >+ operand_entry_t oelast = VEC_last (operand_entry_t, *ops); >+ >+ if (oelast->rank == 0 && INTEGRAL_TYPE_P (TREE_TYPE (oelast->op))) >+ { >+ switch (opcode) >+ { >+ case BIT_AND_EXPR: >+ if (integer_zerop (oelast->op)) >+ { >+ if (VEC_length (operand_entry_t, *ops) != 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Found & 0, removing all other ops\n"); >+ reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; >+ VEC_free (operand_entry_t, heap, *ops); >+ *ops = NULL; >+ VEC_safe_push (operand_entry_t, heap, *ops, oelast); >+ return; >+ } >+ } >+ /* FALLTHRU */ >+ case BIT_IOR_EXPR: >+ if (integer_all_onesp (oelast->op)) >+ { >+ if (VEC_length (operand_entry_t, *ops) != 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Found [&|] -1, removing\n"); >+ VEC_pop (operand_entry_t, *ops); >+ reassociate_stats.ops_eliminated++; >+ } >+ } >+ break; >+ case MULT_EXPR: >+ if (integer_zerop (oelast->op)) >+ { >+ if (VEC_length (operand_entry_t, *ops) != 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Found * 0, removing all other ops\n"); >+ reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; >+ VEC_free (operand_entry_t, heap, *ops); >+ *ops = NULL; >+ VEC_safe_push (operand_entry_t, heap, *ops, oelast); >+ return; >+ } >+ } >+ else if (integer_onep (oelast->op)) >+ { >+ if (VEC_length (operand_entry_t, *ops) != 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Found * 1, removing\n"); >+ VEC_pop (operand_entry_t, *ops); >+ reassociate_stats.ops_eliminated++; >+ return; >+ } >+ } >+ break; >+ case BIT_XOR_EXPR: >+ case PLUS_EXPR: >+ case MINUS_EXPR: >+ if (integer_zerop (oelast->op)) >+ { >+ if (VEC_length (operand_entry_t, *ops) != 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Found [|^+] 0, removing\n"); >+ VEC_pop (operand_entry_t, *ops); >+ reassociate_stats.ops_eliminated++; >+ return; >+ } >+ } >+ break; >+ default: >+ break; >+ } >+ } >+} >+ >+/* Perform various identities and other optimizations on the list of >+ operand entries, stored in OPS. The tree code for the binary >+ operation between all the operands is OPCODE. */ > > static void >-execute_reassoc (void) >+optimize_ops_list (enum tree_code opcode, >+ VEC (operand_entry_t, heap) **ops) > { >- init_reassoc (); >- do_reassoc (); >+ unsigned int length = VEC_length (operand_entry_t, *ops); >+ unsigned int i; >+ operand_entry_t oe; >+ int fconstcount = 0; >+ int iconstcount = 0; >+ int oconstcount = 0; >+ operand_entry_t oelast = NULL; >+ bool iterate = false; >+ >+ if (length == 1) >+ return; >+ >+ oelast = VEC_last (operand_entry_t, *ops); >+ >+ /* If the last two are constants, pop the constants off, merge them >+ and try the next two. */ >+ if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) >+ { >+ operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2); >+ >+ if (oelm1->rank == 0 >+ && is_gimple_min_invariant (oelm1->op) >+ && lang_hooks.types_compatible_p (TREE_TYPE (oelm1->op), >+ TREE_TYPE (oelast->op))) >+ { >+ tree folded = fold_build2 (opcode, TREE_TYPE (oelm1->op), >+ oelm1->op, oelast->op); >+ >+ if (is_gimple_min_invariant (folded)) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "Merging constants\n"); >+ >+ VEC_pop (operand_entry_t, *ops); >+ VEC_pop (operand_entry_t, *ops); >+ >+ add_to_ops_vec (ops, folded); >+ reassociate_stats.constants_eliminated++; >+ >+ optimize_ops_list (opcode, ops); >+ return; >+ } >+ } >+ } >+ >+ eliminate_using_constants (opcode, ops); >+ oelast = NULL; >+ >+ for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);) >+ { >+ bool done = false; >+ >+ if (eliminate_not_pairs (opcode, ops, i, oe)) >+ return; >+ if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) >+ || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))) >+ { >+ if (done) >+ return; >+ iterate = true; >+ oelast = NULL; >+ continue; >+ } >+ if (oe->rank == 0 && is_gimple_min_invariant (oe->op)) >+ { >+ switch (constant_type (oe->op)) >+ { >+ case INTEGER_CONST_TYPE: >+ iconstcount++; >+ break; >+ >+ case FLOAT_CONST_TYPE: >+ fconstcount++; >+ break; >+ case OTHER_CONST_TYPE: >+ oconstcount++; >+ break; >+ >+ default: >+ break; >+ } >+ } >+ oelast = oe; >+ i++; >+ } >+ if (iconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "iconstcount is %d\n", iconstcount); >+ if (fconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "fconstcount is %d\n", fconstcount); >+ if (oconstcount> 0 && dump_file && (dump_flags & TDF_DETAILS)) >+ fprintf (dump_file, "oconstcount is %d\n", oconstcount); >+ >+ length = VEC_length (operand_entry_t, *ops); >+ oelast = VEC_last (operand_entry_t, *ops); >+ >+ if (iterate) >+ optimize_ops_list (opcode, ops); >+} >+ >+#if 0 >+static tree >+merge_two_ops (enum tree_code code, tree op1, tree op2) >+{ >+ tree temp; >+ tree name; >+ tree newexpr; >+ >+ temp = create_tmp_var (TREE_TYPE (op1), "mergetmp"); >+ add_referenced_tmp_var (temp); >+ newexpr = fold_build2 (code, TREE_TYPE (op1), op1, op2); >+ newexpr = build (MODIFY_EXPR, TREE_TYPE (op1), temp, newexpr); >+ name = make_ssa_name (temp, newexpr); >+ TREE_OPERAND (newexpr, 0) = name; >+ return name; >+} >+ >+static void >+merge_leaves_of_rank (enum tree_code opcode, VEC (tree, heap) *worklist, >+ VEC (tree, heap) **results) >+{ >+ unsigned int length = VEC_length (tree, worklist); >+ if (length % 2 == 1) >+ { >+ VEC_safe_push (tree, heap, *results, VEC_pop (tree, worklist)); >+ length--; >+ } >+ while (length != 0) >+ { >+ tree oe1 = VEC_pop (tree, worklist); >+ tree oe2 = VEC_pop (tree, worklist); >+ length -= 2; >+ VEC_safe_push (tree, heap, *results, merge_two_ops (opcode, oe1, oe2)); >+ } >+ VEC_free (tree, heap, worklist); >+ >+} >+ >+static void >+rewrite_expr_tree_new (tree stmt, enum tree_code opcode, >+ VEC (operand_entry_t, heap) **ops) >+{ >+ VEC(tree, heap) *initial_merge_worklist = NULL; >+ VEC(tree, heap) *merge_worklist = NULL; >+ VEC(tree, heap) *stmtsforsale = NULL; >+ operand_entry_t oe; >+ unsigned int opslength = VEC_length (operand_entry_t, *ops); >+ >+ if (opslength < 4) >+ return; >+ >+ /* Initialize the worklist */ >+ while (VEC_length (operand_entry_t, *ops) != 0) >+ { >+ initial_merge_worklist = NULL; >+ oe = VEC_pop (operand_entry_t, *ops); >+ while (!VEC_empty (operand_entry_t, *ops) >+ && VEC_last (operand_entry_t, *ops)->rank == oe->rank) >+ { >+ VEC_safe_push (tree, heap, initial_merge_worklist, oe->op); >+ oe = VEC_pop (operand_entry_t, *ops); >+ } >+ VEC_safe_push (tree, heap, initial_merge_worklist, oe->op); >+ merge_leaves_of_rank (opcode, initial_merge_worklist, &merge_worklist); >+ } >+ >+ gcc_assert (VEC_length (tree, merge_worklist) <= opslength); >+ >+ while (VEC_length (tree, merge_worklist) != 0) >+ add_to_ops_vec (ops, VEC_pop (tree, merge_worklist)); >+} >+#endif >+ >+static bool >+is_phi_for_stmt (tree stmt, tree operand) >+{ >+ tree def_stmt; >+ tree lhs = TREE_OPERAND (stmt, 0); >+ use_operand_p arg_p; >+ ssa_op_iter i; >+ >+ if (TREE_CODE (operand) != SSA_NAME) >+ return false; >+ >+ def_stmt = SSA_NAME_DEF_STMT (operand); >+ if (TREE_CODE (def_stmt) != PHI_NODE) >+ return false; >+ >+ FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) >+ if (lhs == USE_FROM_PTR (arg_p)) >+ return true; >+ return false; >+} >+ >+/* Recursively rewrite our linearized statements so that the operators >+ match those in OPS[OPINDEX], putting the computation in rank >+ order. */ >+ >+static void >+rewrite_expr_tree (tree stmt, unsigned int opindex, >+ VEC(operand_entry_t, heap) * ops) >+{ >+ tree rhs = TREE_OPERAND (stmt, 1); >+ operand_entry_t oe; >+ >+ /* If we have three operands left, then we want to make sure the one >+ that gets the double binary op are the ones with the same rank. >+ >+ The alternative we try is to see if this is a destructive >+ update style statement, which is like: >+ b = phi (a, ...) >+ a = c + b; >+ In that case, we want to use the destructive update form to >+ expose the possible vectorizer sum reduction opportunity. >+ In that case, the third operand will be the phi node. >+ >+ We could, of course, try to be better as noted above, and do a >+ lot of work to try to find these opportunities in >3 operand >+ cases, but it is unlikely to be worth it. */ >+ if (opindex + 3 == VEC_length (operand_entry_t, ops)) >+ { >+ operand_entry_t oe1, oe2, oe3; >+ >+ oe1 = VEC_index (operand_entry_t, ops, opindex); >+ oe2 = VEC_index (operand_entry_t, ops, opindex + 1); >+ oe3 = VEC_index (operand_entry_t, ops, opindex + 2); >+ >+ if ((oe1->rank == oe2->rank >+ && oe2->rank != oe3->rank) >+ || (is_phi_for_stmt (stmt, oe3->op) >+ && !is_phi_for_stmt (stmt, oe1->op) >+ && !is_phi_for_stmt (stmt, oe2->op))) >+ { >+ struct operand_entry temp = *oe3; >+ oe3->op = oe1->op; >+ oe3->rank = oe1->rank; >+ oe1->op = temp.op; >+ oe1->rank= temp.rank; >+ } >+ } >+ >+ /* The final recursion case for this function is that you have >+ exactly two operations left. >+ If we had one exactly one op in the entire list to start with, we >+ would have never called this function, and the tail recursion >+ rewrites them one at a time. */ >+ if (opindex + 2 == VEC_length (operand_entry_t, ops)) >+ { >+ operand_entry_t oe1, oe2; >+ >+ oe1 = VEC_index (operand_entry_t, ops, opindex); >+ oe2 = VEC_index (operand_entry_t, ops, opindex + 1); >+ >+ if (TREE_OPERAND (rhs, 0) != oe1->op >+ || TREE_OPERAND (rhs, 1) != oe2->op) >+ { >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Transforming "); >+ print_generic_expr (dump_file, rhs, 0); >+ } >+ >+ TREE_OPERAND (rhs, 0) = oe1->op; >+ TREE_OPERAND (rhs, 1) = oe2->op; >+ update_stmt (stmt); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, " into "); >+ print_generic_stmt (dump_file, rhs, 0); >+ } >+ >+ } >+ return; >+ } >+ >+ /* If we hit here, we should have 3 or more ops left. */ >+ gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops)); >+ >+ /* Rewrite the next operator. */ >+ oe = VEC_index (operand_entry_t, ops, opindex); >+ >+ if (oe->op != TREE_OPERAND (rhs, 1)) >+ { >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Transforming "); >+ print_generic_expr (dump_file, rhs, 0); >+ } >+ >+ TREE_OPERAND (rhs, 1) = oe->op; >+ update_stmt (stmt); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, " into "); >+ print_generic_stmt (dump_file, rhs, 0); >+ } >+ } >+ /* Recurse on the LHS of the binary operator, which is guaranteed to >+ be the non-leaf side. */ >+ rewrite_expr_tree (SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)), >+ opindex + 1, ops); >+} >+ >+/* Transform STMT, which is really (A +B) + (C + D) into the left >+ linear form, ((A+B)+C)+D. >+ Recurse on D if necessary. */ >+ >+static void >+linearize_expr (tree stmt) >+{ >+ block_stmt_iterator bsinow, bsirhs; >+ tree rhs = TREE_OPERAND (stmt, 1); >+ enum tree_code rhscode = TREE_CODE (rhs); >+ tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1)); >+ tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)); >+ tree newbinrhs = NULL_TREE; >+ >+ gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs)) >+ && is_reassociable_op (binrhs, TREE_CODE (rhs))); >+ >+ bsinow = bsi_for_stmt (stmt); >+ bsirhs = bsi_for_stmt (binrhs); >+ bsi_move_before (&bsirhs, &bsinow); >+ >+ TREE_OPERAND (rhs, 1) = TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0); >+ if (TREE_CODE (TREE_OPERAND (rhs, 1)) == SSA_NAME) >+ newbinrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1)); >+ TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0) = TREE_OPERAND (binlhs, 0); >+ TREE_OPERAND (rhs, 0) = TREE_OPERAND (binrhs, 0); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Linearized: "); >+ print_generic_stmt (dump_file, rhs, 0); >+ } >+ >+ reassociate_stats.linearized++; >+ update_stmt (binrhs); >+ update_stmt (binlhs); >+ update_stmt (stmt); >+ TREE_VISITED (binrhs) = 1; >+ TREE_VISITED (binlhs) = 1; >+ TREE_VISITED (stmt) = 1; >+ >+ /* Tail recurse on the new rhs if it still needs reassociation. */ >+ if (newbinrhs && is_reassociable_op (newbinrhs, rhscode)) >+ linearize_expr (stmt); >+ >+} >+ >+/* If LHS has a single immediate use that is a MODIFY_EXPR, return >+ it. Otherwise, return NULL. */ >+ >+static tree >+get_single_immediate_use (tree lhs) >+{ >+ use_operand_p immuse; >+ tree immusestmt; >+ >+ if (TREE_CODE (lhs) == SSA_NAME >+ && single_imm_use (lhs, &immuse, &immusestmt)) >+ { >+ if (TREE_CODE (immusestmt) == RETURN_EXPR) >+ immusestmt = TREE_OPERAND (immusestmt, 0); >+ if (TREE_CODE (immusestmt) == MODIFY_EXPR) >+ return immusestmt; >+ } >+ return NULL_TREE; >+} >+static VEC(tree, heap) *broken_up_subtracts; >+ >+ >+/* Recursively negate the value of TONEGATE, and return the SSA_NAME >+ representing the negated value. Insertions of any necessary >+ instructions go before BSI. >+ This function is recursive in that, if you hand it "a_5" as the >+ value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will >+ transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ >+ >+static tree >+negate_value (tree tonegate, block_stmt_iterator *bsi) >+{ >+ tree negatedef = tonegate; >+ tree resultofnegate; >+ >+ if (TREE_CODE (tonegate) == SSA_NAME) >+ negatedef = SSA_NAME_DEF_STMT (tonegate); >+ >+ /* If we are trying to negate a name, defined by an add, negate the >+ add operands instead. */ >+ if (TREE_CODE (tonegate) == SSA_NAME >+ && TREE_CODE (negatedef) == MODIFY_EXPR >+ && TREE_CODE (TREE_OPERAND (negatedef, 0)) == SSA_NAME >+ && num_imm_uses (TREE_OPERAND (negatedef, 0)) == 1 >+ && TREE_CODE (TREE_OPERAND (negatedef, 1)) == PLUS_EXPR) >+ { >+ block_stmt_iterator bsi; >+ tree binop = TREE_OPERAND (negatedef, 1); >+ >+ bsi = bsi_for_stmt (negatedef); >+ TREE_OPERAND (binop, 0) = negate_value (TREE_OPERAND (binop, 0), >+ &bsi); >+ bsi = bsi_for_stmt (negatedef); >+ TREE_OPERAND (binop, 1) = negate_value (TREE_OPERAND (binop, 1), >+ &bsi); >+ update_stmt (negatedef); >+ return TREE_OPERAND (negatedef, 0); >+ } >+ >+ tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); >+ resultofnegate = force_gimple_operand_bsi (bsi, tonegate, true, >+ NULL_TREE); >+ VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate); >+ return resultofnegate; >+ >+} >+ >+/* Return true if we should break up the subtract in STMT into an add >+ with negate. This is true when we the subtract operands are really >+ adds, or the subtract itself is used in an add expression. In >+ either case, breaking up the subtract into an add with negate >+ exposes the adds to reassociation. */ >+ >+static bool >+should_break_up_subtract (tree stmt) >+{ >+ >+ tree lhs = TREE_OPERAND (stmt, 0); >+ tree rhs = TREE_OPERAND (stmt, 1); >+ tree binlhs = TREE_OPERAND (rhs, 0); >+ tree binrhs = TREE_OPERAND (rhs, 1); >+ tree immusestmt; >+ >+ if (TREE_CODE (binlhs) == SSA_NAME >+ && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR)) >+ return true; >+ >+ if (TREE_CODE (binrhs) == SSA_NAME >+ && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR)) >+ return true; >+ >+ if (TREE_CODE (lhs) == SSA_NAME >+ && (immusestmt = get_single_immediate_use (lhs)) >+ && TREE_CODE (TREE_OPERAND (immusestmt, 1)) == PLUS_EXPR) >+ return true; >+ return false; >+ >+} >+ >+/* Transform STMT from A - B into A + -B. */ >+ >+static void >+break_up_subtract (tree stmt, block_stmt_iterator *bsi) >+{ >+ tree rhs = TREE_OPERAND (stmt, 1); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Breaking up subtract "); >+ print_generic_stmt (dump_file, stmt, 0); >+ } >+ >+ TREE_SET_CODE (TREE_OPERAND (stmt, 1), PLUS_EXPR); >+ TREE_OPERAND (rhs, 1) = negate_value (TREE_OPERAND (rhs, 1), bsi); >+ >+ update_stmt (stmt); >+} >+ >+/* Recursively linearize a binary expression that is the RHS of STMT. >+ Place the operands of the expression tree in the vector named OPS. */ >+ >+static void >+linearize_expr_tree (VEC(operand_entry_t, heap) **ops, tree stmt) >+{ >+ block_stmt_iterator bsinow, bsilhs; >+ tree rhs = TREE_OPERAND (stmt, 1); >+ tree binrhs = TREE_OPERAND (rhs, 1); >+ tree binlhs = TREE_OPERAND (rhs, 0); >+ tree binlhsdef, binrhsdef; >+ bool binlhsisreassoc = false; >+ bool binrhsisreassoc = false; >+ enum tree_code rhscode = TREE_CODE (rhs); >+ >+ TREE_VISITED (stmt) = 1; >+ >+ if (TREE_CODE (binlhs) == SSA_NAME) >+ { >+ binlhsdef = SSA_NAME_DEF_STMT (binlhs); >+ binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode); >+ } >+ >+ if (TREE_CODE (binrhs) == SSA_NAME) >+ { >+ binrhsdef = SSA_NAME_DEF_STMT (binrhs); >+ binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode); >+ } >+ >+ /* If the LHS is not reassociable, but the RHS is, we need to swap >+ them. If neither is reassociable, there is nothing we can do, so >+ just put them in the ops vector. If the LHS is reassociable, >+ linearize it. If both are reassociable, then linearize the RHS >+ and the LHS. */ >+ >+ if (!binlhsisreassoc) >+ { >+ tree temp; >+ >+ if (!binrhsisreassoc) >+ { >+ add_to_ops_vec (ops, binrhs); >+ add_to_ops_vec (ops, binlhs); >+ return; >+ } >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "swapping operands of "); >+ print_generic_expr (dump_file, stmt, 0); >+ } >+ >+ swap_tree_operands (stmt, &TREE_OPERAND (rhs, 0), >+ &TREE_OPERAND (rhs, 1)); >+ update_stmt (stmt); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, " is now "); >+ print_generic_stmt (dump_file, stmt, 0); >+ } >+ >+ /* We want to make it so the lhs is always the reassociative op, >+ so swap. */ >+ temp = binlhs; >+ binlhs = binrhs; >+ binrhs = temp; >+ } >+ else if (binrhsisreassoc) >+ { >+ linearize_expr (stmt); >+ gcc_assert (rhs == TREE_OPERAND (stmt, 1)); >+ binlhs = TREE_OPERAND (rhs, 0); >+ binrhs = TREE_OPERAND (rhs, 1); >+ } >+ >+ gcc_assert (TREE_CODE (binrhs) != SSA_NAME >+ || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode)); >+ bsinow = bsi_for_stmt (stmt); >+ bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs)); >+ bsi_move_before (&bsilhs, &bsinow); >+ linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs)); >+ add_to_ops_vec (ops, binrhs); >+} >+ >+/* Repropagate the negates back into subtracts, since no other pass >+ currently does it. */ >+ >+static void >+repropagate_negates (void) >+{ >+ unsigned int i = 0; >+ tree negate; >+ >+ for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++) >+ { >+ tree user = get_single_immediate_use (negate); >+ >+ /* Due to linearization, the negate operand should now be an RHS >+ leaf of some PLUS expression. I.E. >+ >+ d = -c >+ e = a + d >+ >+ So just repropagate it, transforming the PLUS_EXPR back into >+ a MINUS_EXPR. */ >+ >+ if (user >+ && TREE_CODE (user) == MODIFY_EXPR >+ && TREE_CODE (TREE_OPERAND (user, 1)) == PLUS_EXPR >+ && TREE_OPERAND (TREE_OPERAND (user, 1), 1) == negate) >+ { >+ tree rhs = TREE_OPERAND (user, 1); >+ TREE_SET_CODE (rhs, MINUS_EXPR); >+ TREE_OPERAND (rhs, 1) = get_unary_op (negate, NEGATE_EXPR); >+ update_stmt (user); >+ } >+ } >+} >+ >+/* Break up subtract operations in block BB. >+ >+ We do this top down because we don't know whether the subtract is >+ part of a possible chain of reassociation except at the top. >+ >+ IE given >+ d = f + g >+ c = a + e >+ b = c - d >+ q = b - r >+ k = t - q >+ >+ we want to break up k = t - q, but we won't until we've transformed q >+ = b - r, which won't be broken up until we transform b = c - d. */ >+ >+static void >+break_up_subtract_bb (basic_block bb) >+{ >+ block_stmt_iterator bsi; >+ basic_block son; >+ >+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) >+ { >+ tree stmt = bsi_stmt (bsi); >+ >+ if (TREE_CODE (stmt) == MODIFY_EXPR) >+ { >+ tree lhs = TREE_OPERAND (stmt, 0); >+ tree rhs = TREE_OPERAND (stmt, 1); >+ >+ TREE_VISITED (stmt) = 0; >+ /* If unsafe math optimizations we can do reassociation for >+ non-integral types. */ >+ if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) >+ && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) >+ || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) >+ || !flag_unsafe_math_optimizations)) >+ continue; >+ >+ /* Check for a subtract used only in an addition. If this >+ is the case, transform it into add of a negate for better >+ reassociation. IE transform C = A-B into C = A + -B if C >+ is only used in an addition. */ >+ if (TREE_CODE (rhs) == MINUS_EXPR) >+ if (should_break_up_subtract (stmt)) >+ break_up_subtract (stmt, &bsi); >+ } >+ } >+ for (son = first_dom_son (CDI_DOMINATORS, bb); >+ son; >+ son = next_dom_son (CDI_DOMINATORS, son)) >+ break_up_subtract_bb (son); >+} >+ >+/* Reassociate expressions in basic block BB and its post-dominator as >+ children. */ >+ >+static void >+reassociate_bb (basic_block bb) >+{ >+ block_stmt_iterator bsi; >+ basic_block son; >+ >+ for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi)) >+ { >+ tree stmt = bsi_stmt (bsi); >+ >+ if (TREE_CODE (stmt) == MODIFY_EXPR) >+ { >+ tree lhs = TREE_OPERAND (stmt, 0); >+ tree rhs = TREE_OPERAND (stmt, 1); >+ >+ /* If this was part of an already processed tree, we don't >+ need to touch it again. */ >+ if (TREE_VISITED (stmt)) >+ continue; >+ >+ /* If unsafe math optimizations we can do reassociation for >+ non-integral types. */ >+ if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) >+ && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) >+ || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) >+ || !flag_unsafe_math_optimizations)) >+ continue; >+ >+ if (associative_tree_code (TREE_CODE (rhs))) >+ { >+ VEC(operand_entry_t, heap) *ops = NULL; >+ >+ /* There may be no immediate uses left by the time we >+ get here because we may have eliminated them all. */ >+ if (TREE_CODE (lhs) == SSA_NAME && num_imm_uses (lhs) == 0) >+ continue; >+ >+ TREE_VISITED (stmt) = 1; >+ linearize_expr_tree (&ops, stmt); >+ qsort (VEC_address (operand_entry_t, ops), >+ VEC_length (operand_entry_t, ops), >+ sizeof (operand_entry_t), >+ sort_by_operand_rank); >+ optimize_ops_list (TREE_CODE (rhs), &ops); >+ >+ if (VEC_length (operand_entry_t, ops) == 1) >+ { >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, "Transforming "); >+ print_generic_expr (dump_file, rhs, 0); >+ } >+ TREE_OPERAND (stmt, 1) = VEC_last (operand_entry_t, ops)->op; >+ update_stmt (stmt); >+ >+ if (dump_file && (dump_flags & TDF_DETAILS)) >+ { >+ fprintf (dump_file, " into "); >+ print_generic_stmt (dump_file, >+ TREE_OPERAND (stmt, 1), 0); >+ } >+ } >+ else >+ { >+ /*rewrite_expr_tree_new (stmt, TREE_CODE (rhs), &ops);*/ >+ rewrite_expr_tree (stmt, 0, ops); >+ } >+ >+ VEC_free (operand_entry_t, heap, ops); >+ } >+ } >+ } >+ for (son = first_dom_son (CDI_POST_DOMINATORS, bb); >+ son; >+ son = next_dom_son (CDI_POST_DOMINATORS, son)) >+ reassociate_bb (son); >+} >+ >+void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops); >+void debug_ops_vector (VEC (operand_entry_t, heap) *ops); >+ >+/* Dump the operand entry vector OPS to FILE. */ >+ >+void >+dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops) >+{ >+ operand_entry_t oe; >+ unsigned int i; >+ >+ for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++) >+ { >+ fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); >+ print_generic_stmt (file, oe->op, 0); >+ } >+} >+ >+/* Dump the operand entry vector OPS to STDERR. */ >+ >+void >+debug_ops_vector (VEC (operand_entry_t, heap) *ops) >+{ >+ dump_ops_vector (stderr, ops); >+} >+ >+static void >+do_reassoc (void) >+{ >+ break_up_subtract_bb (ENTRY_BLOCK_PTR); >+ reassociate_bb (EXIT_BLOCK_PTR); >+} >+ >+/* Initialize the reassociation pass. */ >+ >+static void >+init_reassoc (void) >+{ >+ int i; >+ unsigned int rank = 2; >+ tree param; >+ int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int)); >+ >+ memset (&reassociate_stats, 0, sizeof (reassociate_stats)); >+ >+ operand_entry_pool = create_alloc_pool ("operand entry pool", >+ sizeof (struct operand_entry), 30); >+ >+ /* Reverse RPO (Reverse Post Order) will give us something where >+ deeper loops come later. */ >+ flow_depth_first_order_compute (NULL, bbs); >+ bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int)); >+ >+ operand_rank = htab_create (511, operand_entry_hash, >+ operand_entry_eq, 0); >+ >+ /* Give each argument a distinct rank. */ >+ for (param = DECL_ARGUMENTS (current_function_decl); >+ param; >+ param = TREE_CHAIN (param)) >+ { >+ if (default_def (param) != NULL) >+ { >+ tree def = default_def (param); >+ insert_operand_rank (def, ++rank); >+ } >+ } >+ >+ /* Give the chain decl a distinct rank. */ >+ if (cfun->static_chain_decl != NULL) >+ { >+ tree def = default_def (cfun->static_chain_decl); >+ if (def != NULL) >+ insert_operand_rank (def, ++rank); >+ } >+ >+ /* Set up rank for each BB */ >+ for (i = 0; i < n_basic_blocks; i++) >+ bb_rank[bbs[i]] = ++rank << 16; >+ >+ free (bbs); >+ calculate_dominance_info (CDI_DOMINATORS); >+ calculate_dominance_info (CDI_POST_DOMINATORS); >+ broken_up_subtracts = NULL; >+} >+ >+/* Cleanup after the reassociation pass, and print stats if >+ requested. */ >+ >+static void >+fini_reassoc (void) >+{ >+ >+ if (dump_file && (dump_flags & TDF_STATS)) >+ { >+ fprintf (dump_file, "Reassociation stats:\n"); >+ fprintf (dump_file, "Linearized: %d\n", reassociate_stats.linearized); >+ fprintf (dump_file, "Constants eliminated: %d\n", >+ reassociate_stats.constants_eliminated); >+ fprintf (dump_file, "Ops eliminated: %d\n", >+ reassociate_stats.ops_eliminated); >+ fprintf (dump_file, "Statements rewritten: %d\n", >+ reassociate_stats.rewritten); >+ } >+ htab_delete (operand_rank); >+ >+ free_alloc_pool (operand_entry_pool); >+ free (bb_rank); >+ VEC_free (tree, heap, broken_up_subtracts); >+} >+ >+/* Gate and execute functions for Reassociation. */ >+ >+static void >+execute_reassoc (void) >+{ >+ init_reassoc (); >+ >+ do_reassoc (); >+ repropagate_negates (); >+ > fini_reassoc (); > } > >@@ -635,7 +1566,6 @@ struct tree_opt_pass pass_reassoc = > 0, /* properties_provided */ > 0, /* properties_destroyed */ > 0, /* todo_flags_start */ >- TODO_update_ssa | TODO_dump_func >- | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */ >+ TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */ > 0 /* letter */ > };
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 113541
:
73558
|
73559
|
73560
|
73561
| 73562