Attachment 73562 Details for Bug 113541 – reassoc-20051116-vanilla1.patch

[patch] reassoc-20051116-vanilla1.patch

reassoc-20051116-vanilla1.patch (text/plain), 71.21 KB, created by Alexey Maximov on 2005-11-25 02:25:02 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Alexey Maximov

Created: 2005-11-25 02:25:02 UTC

Size: 71.21 KB

patch

obsolete

> gcc/Makefile.in                            |    3 
> gcc/passes.c                               |    3 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c  |    2 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c |   11 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c |   11 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c  |   17 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c  |   20 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c  |    4 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c  |   17 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c  |   13 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c  |   12 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c  |   13 
> gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c  |   14 
> gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c  |    5 
> gcc/tree-flow.h                            |    2 
> gcc/tree-ssa-dom.c                         |   47 
> gcc/tree-ssa-operands.c                    |   34 
> gcc/tree-ssa-reassoc.c                     | 1800 +++++++++++++++++++++--------
> 18 files changed, 1526 insertions(+), 502 deletions(-)
>
>Index: gcc41/gcc/Makefile.in
>===================================================================
>--- gcc41.orig/gcc/Makefile.in
>+++ gcc41/gcc/Makefile.in
>@@ -1932,7 +1932,8 @@ tree-ssa-alias.o : tree-ssa-alias.c $(TR
> tree-ssa-reassoc.o : tree-ssa-reassoc.c $(TREE_FLOW_H) $(CONFIG_H) \
>    $(SYSTEM_H) $(TREE_H) $(GGC_H) $(DIAGNOSTIC_H) errors.h $(TIMEVAR_H) \
>    $(TM_H) coretypes.h $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) tree-iterator.h\
>-   $(BASIC_BLOCK_H) $(HASHTAB_H) $(TREE_GIMPLE_H) tree-inline.h
>+   $(BASIC_BLOCK_H) $(TREE_GIMPLE_H) tree-inline.h vec.h \
>+   alloc-pool.h
> tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
>    $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h $(DIAGNOSTIC_H) \
>    $(FLAGS_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) toplev.h \
>Index: gcc41/gcc/passes.c
>===================================================================
>--- gcc41.orig/gcc/passes.c
>+++ gcc41/gcc/passes.c
>@@ -535,8 +535,9 @@ init_optimization_passes (void)
>      which can create arbitrary GIMPLE.  */
>   NEXT_PASS (pass_may_alias);
>   NEXT_PASS (pass_cse_reciprocals);
>-  NEXT_PASS (pass_split_crit_edges);
>   NEXT_PASS (pass_reassoc);
>+  NEXT_PASS (pass_dce);
>+  NEXT_PASS (pass_split_crit_edges);
>   NEXT_PASS (pass_pre);
>   NEXT_PASS (pass_sink_code);
>   NEXT_PASS (pass_tree_loop);
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c
>===================================================================
>--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-1.c
>@@ -14,5 +14,5 @@ int main(void)
>   printf ("%d %d\n", e, f);
> }
> 
>-/* { dg-final { scan-tree-dump-times "a \\\+ b" 1 "optimized"} } */
>+/* { dg-final { scan-tree-dump-times "b \\\+ a" 1 "optimized"} } */
> /* { dg-final { cleanup-tree-dump "optimized" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-10.c
>@@ -0,0 +1,11 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+int main(int a, int b, int c, int d)
>+{
>+  /* Should become just a & b & c & d */
>+  int e = (a & b) & (c & d);
>+  int f = (c & a) & (b & d);
>+  return e & f;
>+}
>+/* { dg-final { scan-tree-dump-times "\\\& " 3 "optimized"} } */
>+/* { dg-final { cleanup-tree-dump "optimized" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-11.c
>@@ -0,0 +1,11 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-reassoc" } */
>+int main(int a, int b, int c, int d)
>+{
>+  /* All the xor's cancel each other out, leaving 0  */
>+  int e = (a ^ b) ^ (c ^ d);
>+  int f = (c ^ a) ^ (b ^ d);
>+  return e ^ f;
>+}
>+/* { dg-final { scan-tree-dump-times "= 0" 1 "reassoc"} } */
>+/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c
>===================================================================
>--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-2.c
>@@ -1,18 +1,17 @@
> /* { dg-do compile } */
>-/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-reassoc-details" } */
>-extern int a0, a1, a2, a3, a4; 
>-int f () 
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+int f (int a0,int a1,int a2,int a3,int a4)
> { 
>-int b0, b1, b2, b3, b4; 
>+int b0, b1, b2, b3, b4,e;
>   /* this can be optimized to four additions... */ 
>   b4 = a4 + a3 + a2 + a1 + a0; 
>   b3 = a3 + a2 + a1 + a0; 
>   b2 = a2 + a1 + a0; 
>   b1 = a1 + a0; 
>   /* This is actually 0 */
>-  return b4 - b3 + b2 - b1 - a4 - a2;
>-} 
>-/* { dg-final { scan-tree-dump-times "Reassociating by rank" 3 "reassoc" } } */
>-/* { dg-final { scan-tree-dump-times "return 0" 1 "optimized" { xfail *-*-* } } } */
>+  e = b4 - b3 + b2 - b1 - a4 - a2;
>+  return e;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "return 0" 1 "optimized" } } */
> /* { dg-final { cleanup-tree-dump "optimized" } } */
>-/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c
>===================================================================
>--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-3.c
>@@ -1,18 +1,6 @@
>-/* { dg-do compile } */ 
>-/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */
>-float a, b, c, d;
>-extern int printf (const char *, ...);
>-int main(void)
>+int main(int a, int b, int c, int d)
> {
>-  float e;
>-  float f;
>-  /* We should be able to transform these into the same expression, and only have two additions.  */
>-  e = a + b;
>-  e = e + c;
>-  f = c + a;
>-  f = f + b;
>-  printf ("%f %f\n", e, f);
>+  int e = (a & ~b) & (~c & d);
>+  int f = (~c & a) & (b & ~d);
>+ return (e & f);
> }
>-
>-/* { dg-final { scan-tree-dump-times "\\\+" 2 "optimized"} } */
>-/* { dg-final { cleanup-tree-dump "optimized" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c
>===================================================================
>--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-4.c
>@@ -1,5 +1,5 @@
> /* { dg-do compile } */ 
>-/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-options "-O2 -fdump-tree-optimized -ffast-math" } */
> float a, b, c, d;
> extern int printf (const char *, ...);
> int main(void)
>@@ -14,5 +14,5 @@ int main(void)
>   printf ("%f %f\n", e, f);
> }
> 
>-/* { dg-final { scan-tree-dump-times "\\\+" 4 "optimized"} } */
>+/* { dg-final { scan-tree-dump-times "\\\+" 2 "optimized"} } */
> /* { dg-final { cleanup-tree-dump "optimized" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-5.c
>@@ -0,0 +1,17 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+extern int printf (const char *, ...);
>+int main(int argc, int b)
>+{
>+  /* We should be able to get rid of the a - i.  */
>+  int i;
>+  for (i = 0; i < 50; i++)
>+    {
>+      int a = b + i;
>+      int c = a - i;
>+      int d = argc + b;
>+      printf ("%d %d\n", c,d);
>+    }
>+}
>+/* { dg-final { scan-tree-dump-times "a - i" 0 "optimized"} } */
>+/* { dg-final { cleanup-tree-dump "optimized" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-6.c
>@@ -0,0 +1,13 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-reassoc" } */
>+int main(int a, int b, int c, int d)
>+{
>+  /* Should be transformed into a + c + 8 */
>+  int e = a + 3;
>+  int f = c + 5;
>+  int g = e + f;
>+  return g;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "\\\+ 8" 1 "reassoc"} } */
>+/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-7.c
>@@ -0,0 +1,12 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-reassoc" } */
>+int main(int a, int b, int c, int d, int e, int f, int g, int h)
>+{
>+  /* Should be transformed into a + c + d + e + g + 15 */
>+  int i = (a + 9) + (c + d);
>+  int j = (e + 4) + (2 + g);
>+  e = i + j;
>+  return e;
>+}
>+/* { dg-final { scan-tree-dump-times "\\\+ 15" 1 "reassoc"} } */
>+/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-8.c
>@@ -0,0 +1,13 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-reassoc" } */
>+
>+int main(int a, int b, int c, int d, int e, int f, int g, int h)
>+{
>+  /* e & ~e -> 0 */
>+  int i = (a & 9) & (c & d);
>+  int j = (~e & d) & (~c & e);
>+  e = i & j;
>+  return e;
>+}
>+/* { dg-final { scan-tree-dump-times "= 0" 1 "reassoc"} } */
>+/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c
>===================================================================
>--- /dev/null
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/reassoc-9.c
>@@ -0,0 +1,14 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-reassoc" } */
>+
>+int main(int a, int b, int c, int d, int e, int f, int g, int h)
>+{
>+  /* Should be transformed into e = 20 */
>+  int i = (a + 9) + (c + 8);
>+  int j = (-c + 1) + (-a + 2);
>+
>+  e = i + j;
>+  return e;
>+}
>+/* { dg-final { scan-tree-dump-times "= 20" 1 "reassoc"} } */
>+/* { dg-final { cleanup-tree-dump "reassoc" } } */
>Index: gcc41/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c
>===================================================================
>--- gcc41.orig/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c
>+++ gcc41/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-2.c
>@@ -16,6 +16,7 @@ int motion_test1(int data, int data_0, i
> 	return v * t * u;
> }
> /* We should eliminate one computation of data_0 + data_3 along the 
>-   main path, causing one reload. */
>-/* { dg-final { scan-tree-dump-times "Eliminated: 1" 1 "pre"} } */
>+   main path, and one computation of v * i along the main path, causing
>+   two eliminations. */
>+/* { dg-final { scan-tree-dump-times "Eliminated: 2" 1 "pre"} } */
> /* { dg-final { cleanup-tree-dump "pre" } } */
>Index: gcc41/gcc/tree-flow.h
>===================================================================
>--- gcc41.orig/gcc/tree-flow.h
>+++ gcc41/gcc/tree-flow.h
>@@ -896,4 +896,6 @@ void delete_alias_heapvars (void);
> 
> #include "tree-flow-inline.h"
> 
>+void swap_tree_operands (tree, tree *, tree *);
>+
> #endif /* _TREE_FLOW_H  */
>Index: gcc41/gcc/tree-ssa-dom.c
>===================================================================
>--- gcc41.orig/gcc/tree-ssa-dom.c
>+++ gcc41/gcc/tree-ssa-dom.c
>@@ -588,6 +588,45 @@ struct tree_opt_pass pass_dominator = 
> };
> 
> 
>+/* Given a stmt CONDSTMT containing a COND_EXPR, canonicalize the
>+   COND_EXPR into a canonical form.  */
>+
>+static void
>+canonicalize_comparison (tree condstmt)
>+{
>+  tree cond = COND_EXPR_COND (condstmt);
>+  tree op0;
>+  tree op1;
>+  enum tree_code code = TREE_CODE (cond);
>+
>+  if (!COMPARISON_CLASS_P (cond))
>+    return;
>+
>+  op0 = TREE_OPERAND (cond, 0);
>+  op1 = TREE_OPERAND (cond, 1);
>+
>+  /* If it would be profitable to swap the operands, then do so to
>+     canonicalize the statement, enabling better optimization.
>+
>+     By placing canonicalization of such expressions here we
>+     transparently keep statements in canonical form, even
>+     when the statement is modified.  */
>+  if (tree_swap_operands_p (op0, op1, false))
>+    {
>+      /* For relationals we need to swap the operands
>+	 and change the code.  */
>+      if (code == LT_EXPR
>+	  || code == GT_EXPR
>+	  || code == LE_EXPR
>+	  || code == GE_EXPR)
>+	{
>+	  TREE_SET_CODE (cond, swap_tree_comparison (code));
>+	  swap_tree_operands (condstmt,
>+			      &TREE_OPERAND (cond, 0),
>+			      &TREE_OPERAND (cond, 1));
>+	}
>+    }
>+}
> /* We are exiting E->src, see if E->dest ends with a conditional
>    jump which has a known value when reached via E. 
> 
>@@ -799,7 +838,10 @@ thread_across_edge (struct dom_walk_data
>       /* Now temporarily cprop the operands and try to find the resulting
> 	 expression in the hash tables.  */
>       if (TREE_CODE (stmt) == COND_EXPR)
>-	cond = COND_EXPR_COND (stmt);
>+	{
>+	  canonicalize_comparison (stmt);
>+	  cond = COND_EXPR_COND (stmt);
>+	}
>       else if (TREE_CODE (stmt) == GOTO_EXPR)
> 	cond = GOTO_DESTINATION (stmt);
>       else
>@@ -2919,6 +2961,9 @@ optimize_stmt (struct dom_walk_data *wal
> 
>   old_stmt = stmt = bsi_stmt (si);
> 
>+  if (TREE_CODE (stmt) == COND_EXPR)
>+    canonicalize_comparison (stmt);
>+
>   update_stmt_if_modified (stmt);
>   ann = stmt_ann (stmt);
>   opt_stats.num_stmts++;
>Index: gcc41/gcc/tree-ssa-operands.c
>===================================================================
>--- gcc41.orig/gcc/tree-ssa-operands.c
>+++ gcc41/gcc/tree-ssa-operands.c
>@@ -1047,7 +1047,6 @@ swap_tree_operands (tree stmt, tree *exp
>   *exp1 = op0;
> }
> 
>-
> /* Recursively scan the expression pointed to by EXPR_P in statement referred
>    to by INFO.  FLAGS is one of the OPF_* constants modifying how to interpret
>    the operands found.  */
>@@ -1257,39 +1256,6 @@ get_expr_operands (tree stmt, tree *expr
>     case ASSERT_EXPR:
>     do_binary:
>       {
>-	tree op0 = TREE_OPERAND (expr, 0);
>-	tree op1 = TREE_OPERAND (expr, 1);
>-
>-	/* If it would be profitable to swap the operands, then do so to
>-	   canonicalize the statement, enabling better optimization.
>-
>-	   By placing canonicalization of such expressions here we
>-	   transparently keep statements in canonical form, even
>-	   when the statement is modified.  */
>-	if (tree_swap_operands_p (op0, op1, false))
>-	  {
>-	    /* For relationals we need to swap the operands
>-	       and change the code.  */
>-	    if (code == LT_EXPR
>-		|| code == GT_EXPR
>-		|| code == LE_EXPR
>-		|| code == GE_EXPR)
>-	      {
>-		TREE_SET_CODE (expr, swap_tree_comparison (code));
>-		swap_tree_operands (stmt,
>-				    &TREE_OPERAND (expr, 0),			
>-				    &TREE_OPERAND (expr, 1));
>-	      }
>-	  
>-	    /* For a commutative operator we can just swap the operands.  */
>-	    else if (commutative_tree_code (code))
>-	      {
>-		swap_tree_operands (stmt,
>-				    &TREE_OPERAND (expr, 0),			
>-				    &TREE_OPERAND (expr, 1));
>-	      }
>-	  }
>-
> 	get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags);
> 	get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags);
> 	return;
>Index: gcc41/gcc/tree-ssa-reassoc.c
>===================================================================
>--- gcc41.orig/gcc/tree-ssa-reassoc.c
>+++ gcc41/gcc/tree-ssa-reassoc.c
>@@ -33,249 +33,220 @@ Boston, MA 02110-1301, USA.  */
> #include "tree-gimple.h"
> #include "tree-dump.h"
> #include "timevar.h"
>-#include "hashtab.h"
> #include "tree-iterator.h"
> #include "tree-pass.h"
>+#include "alloc-pool.h"
>+#include "vec.h"
>+#include "langhooks.h"
> 
>-/*  This is a simple global reassociation pass that uses a combination
>-    of heuristics and a hashtable to try to expose more operations to
>-    CSE.  
>-
>-    The basic idea behind the heuristic is to rank expressions by
>-    depth of the computation tree and loop depth, and try to produce
>-    expressions consisting of small rank operations, as they are more
>-    likely to reoccur.  In addition, we use a hashtable to try to see
>-    if we can transpose an operation into something we have seen
>-    before.
>-
>-    Note that the way the hashtable is structured will sometimes find
>-    matches that will not expose additional redundancies, since it is
>-    not unwound as we traverse back up one branch of the dominator
>-    tree and down another.  However, the cost of improving this is
>-    probably not worth the additional benefits it will bring.  */
>+/*  This is a simple global reassociation pass.  It is, in part, based
>+    on the LLVM pass of the same name (They do some things more/less
>+    than we do, in different orders, etc).
> 
>-/* Statistics */
>-static struct
>-{
>-  int reassociated_by_rank;
>-  int reassociated_by_match;
>-} reassociate_stats;
>+    It consists of five steps:
> 
>+    1. Breaking up subtract operations into addition + negate, where
>+    it would promote the reassociation of adds.
> 
>+    2. Left linearization of the expression trees, so that (A+B)+(C+D)
>+    becomes (((A+B)+C)+D), which is easier for us to rewrite later.
>+    During linearization, we place the operands of the binary
>+    expressions into the a vector of operand_entry_t
> 
>-/* Seen binary operator hashtable.  */
>-static htab_t seen_binops;
>+    3. Optimization of the operand lists, eliminating things like a +
>+    -a, a & a, etc.
> 
>-/* Binary operator struct. */
>+    4. Rewrite the expression trees we linearized and optimized so
>+    they are in proper rank order.
> 
>-typedef struct seen_binop_d
>-{
>-  tree op1;
>-  tree op2;
>-} *seen_binop_t;
>+    5. Repropagate negates, as nothing else will clean it up ATM.
> 
>-/* Return a SEEN_BINOP_T if we have seen an associative binary
>-   operator with OP1 and OP2 in it.  */
>+    A bit of theory on #4, since nobody seems to write anything down
>+    about why it makes sense to do it the way they do it:
> 
>-static seen_binop_t
>-find_seen_binop (tree op1, tree op2)
>-{
>-  void **slot;
>-  struct seen_binop_d sbd;
>-  sbd.op1 = op1;
>-  sbd.op2 = op2;
>-  slot = htab_find_slot (seen_binops, &sbd, NO_INSERT);
>-  if (!slot)
>-    return NULL;
>-  return ((seen_binop_t) *slot);
>-}
>+    We could do this much nicer theoretically, but don't (for reasons
>+    explained after how to do it theoretically nice :P).
> 
>-/* Insert a binary operator consisting of OP1 and OP2 into the
>-   SEEN_BINOP table.  */
>+    In order to promote the most redundancy elimination, you want
>+    binary expressions whose operands are the same rank (or
>+    preferrably, the same value) exposed to the redundancy eliminator,
>+    for possible elimination.
> 
>-static void
>-insert_seen_binop (tree op1, tree op2)
>-{
>-  void **slot;
>-  seen_binop_t new_pair = xmalloc (sizeof (*new_pair));
>-  new_pair->op1 = op1;
>-  new_pair->op2 = op2;
>-  slot = htab_find_slot (seen_binops, new_pair, INSERT);
>-  if (*slot != NULL)
>-    free (*slot);
>-  *slot = new_pair;
>-}
>+    So the way to do this if we really cared, is to build the new op
>+    tree from the leaves to the roots, merging as you go, and putting the
>+    new op on the end of the worklist, until you are left with one
>+    thing on the worklist.
> 
>-/* Return the hash value for a seen binop structure pointed to by P.
>-   Because all the binops we consider are associative, we just add the
>-   hash value for op1 and op2.  */
>+  IE if you have to rewrite the following set of operands (listed with
>+  rank in parentheses), with opcode PLUS_EXPR:
> 
>-static hashval_t
>-seen_binop_hash (const void *p)
>-{
>-  const seen_binop_t sb = (seen_binop_t) p;
>-  return iterative_hash_expr (sb->op1, 0) + iterative_hash_expr (sb->op2, 0);
>-}
>+  a (1),  b (1),  c (1),  d (2), e (2)
> 
>-/* Return true if two seen binop structures pointed to by P1 and P2 are equal.
>-   We have to check the operators both ways because we don't know what
>-   order they appear in the table.  */
> 
>-static int
>-seen_binop_eq (const void *p1, const void *p2)
>-{
>-  const seen_binop_t sb1 = (seen_binop_t) p1;
>-  const seen_binop_t sb2 = (seen_binop_t) p2;
>-  return (sb1->op1 == sb2->op1 && sb1->op2 == sb2->op2)
>-    || (sb1->op2 == sb2->op1 && sb1->op1 == sb2->op2);
>-}
>+We start with our merge worklist empty, and the ops list with all of
>+those on it.
>+
>+You want to first merge all leaves of the same rank, as much as
>+possible.
>+
>+So first build a binary op of
>+
>+mergetmp = a + b, and put "mergetmp" on the merge worklist.
>+
>+Because there is no three operand form of PLUS_EXPR, c is not going to
>+be exposed to redundancy elimination as a rank 1 operand.
>+
>+So you might as well throw it on the merge worklist (you could also
>+consider it to now be a rank two operand, and merge it with d and e,
>+but in this case, you then have evicted e from a binary op. So at
>+least in this situation, you can't win.)
>+
>+Then build a binary op of d + e
>+mergetmp2 = d + e
>+
>+and put mergetmp2 on the merge worklist.
>+
>+so merge worklist = {mergetmp, c, mergetmp2}
>+
>+Continue building binary ops of these operations until you have only
>+one operation left on the worklist.
>+
>+So we have
>+
>+build binary op
>+mergetmp3 = mergetmp + c
> 
>-/* Value rank structure.  */
>+worklist = {mergetmp2, mergetmp3}
> 
>-typedef struct valrank_d
>+mergetmp4 = mergetmp2 + mergetmp3
>+
>+worklist = {mergetmp4}
>+
>+because we have one operation left, we can now just set the original
>+statement equal to the result of that operation.
>+
>+This will at least expose a + b  and d + e to redundancy elimination
>+as binary operations.
>+
>+For extra points, you can reuse the old statements to build the
>+mergetmps, since you shouldn't run out.
>+
>+
>+So why don't we do this?
>+
>+Because it's expensive, and rarely will help.  Most trees we are
>+reassociating have 3 or less ops.  If they have 2 ops, they already
>+will be written into a nice single binary op.  If you have 3 ops, a
>+single simple check suffices to tell you whether the first two are of the
>+same rank.  If so, you know to order it
>+
>+mergetmp = op1 + op2
>+newstmt = mergetmp + op3
>+
>+instead of
>+mergetmp = op2 + op3
>+newstmt = mergetmp + op1
>+
>+If all three are of the same rank, you can't expose them all in a
>+single binary operator anyway, so the above is *still* the best you
>+can do.
>+
>+Thus, this is what we do.  When we have three ops left, we check to see
>+what order to put them in, and call it a day.  As a nod to vector sum
>+reduction, we check if any of ops are a really a phi node that is a
>+destructive update for the associating op, and keep the destructive
>+update together for vector sum reduction recognition.  */
>+
>+
>+/* Statistics */
>+static struct
>+{
>+  int linearized;
>+  int constants_eliminated;
>+  int ops_eliminated;
>+  int rewritten;
>+} reassociate_stats;
>+
>+/* Operator, rank pair.  */
>+typedef struct operand_entry
> {
>-  tree e;   
>-  unsigned int rank;  
>-} *valrank_t;
>+  unsigned int rank;
>+  tree op;
>+} *operand_entry_t;
>+
>+static alloc_pool operand_entry_pool;
>+
> 
> /* Starting rank number for a given basic block, so that we can rank
>    operations using unmovable instructions in that BB based on the bb
>    depth.  */
> static unsigned int *bb_rank;
> 
>-/* Value rank hashtable.  */
>-static htab_t value_rank;
>+/* Operand->rank hashtable.  */
>+static htab_t operand_rank;
> 
> 
>-/* Look up the value rank structure for expression E.  */
>+/* Look up the operand rank structure for expression E.  */
> 
>-static valrank_t
>-find_value_rank (tree e)
>+static operand_entry_t
>+find_operand_rank (tree e)
> {
>   void **slot;
>-  struct valrank_d vrd;
>-  vrd.e = e;
>-  slot = htab_find_slot (value_rank, &vrd, NO_INSERT);
>+  struct operand_entry vrd;
>+
>+  vrd.op = e;
>+  slot = htab_find_slot (operand_rank, &vrd, NO_INSERT);
>   if (!slot)
>     return NULL;
>-  return ((valrank_t) *slot);
>+  return ((operand_entry_t) *slot);
> }
> 
>-/* Insert {E,RANK} into the value rank hashtable.  */
>+/* Insert {E,RANK} into the operand rank hashtable.  */
> 
> static void
>-insert_value_rank (tree e, unsigned int rank)
>+insert_operand_rank (tree e, unsigned int rank)
> {
>   void **slot;
>-  valrank_t new_pair = xmalloc (sizeof (*new_pair));
>-  new_pair->e = e;
>+  operand_entry_t new_pair = pool_alloc (operand_entry_pool);
>+
>+  new_pair->op = e;
>   new_pair->rank = rank;
>-  slot = htab_find_slot (value_rank, new_pair, INSERT);
>+  slot = htab_find_slot (operand_rank, new_pair, INSERT);
>   gcc_assert (*slot == NULL);
>   *slot = new_pair;
>-
> }
> 
>-
>-/* Return the hash value for a value rank structure  */
>+/* Return the hash value for a operand rank structure  */
> 
> static hashval_t
>-valrank_hash (const void *p)
>+operand_entry_hash (const void *p)
> {
>-  const valrank_t vr = (valrank_t) p;
>-  return iterative_hash_expr (vr->e, 0);
>+  const operand_entry_t vr = (operand_entry_t) p;
>+  return iterative_hash_expr (vr->op, 0);
> }
> 
>-/* Return true if two value rank structures are equal.  */
>+/* Return true if two operand rank structures are equal.  */
> 
> static int
>-valrank_eq (const void *p1, const void *p2)
>-{
>-  const valrank_t vr1 = (valrank_t) p1;
>-  const valrank_t vr2 = (valrank_t) p2;
>-  return vr1->e == vr2->e;
>-}
>-
>-
>-/* Initialize the reassociation pass.  */
>-
>-static void
>-init_reassoc (void)
>+operand_entry_eq (const void *p1, const void *p2)
> {
>-  int i;
>-  unsigned int rank = 2;
>-  
>-  tree param;
>-  int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int));
>-  
>-  memset (&reassociate_stats, 0, sizeof (reassociate_stats));
>-
>-  /* Reverse RPO (Reverse Post Order) will give us something where
>-     deeper loops come later.  */
>-  flow_reverse_top_sort_order_compute (bbs);
>-  bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int));
>-  value_rank = htab_create (511, valrank_hash,
>-			    valrank_eq, free);
>-  seen_binops = htab_create (511, seen_binop_hash,
>-			     seen_binop_eq, free);
>-
>-  /* Give each argument a distinct rank.   */
>-  for (param = DECL_ARGUMENTS (current_function_decl);
>-       param;
>-       param = TREE_CHAIN (param))
>-    {
>-      if (default_def (param) != NULL)
>-	{
>-	  tree def = default_def (param);
>-	  insert_value_rank (def, ++rank);
>-	}
>-    }
>-  /* Give the chain decl a distinct rank. */
>-  if (cfun->static_chain_decl != NULL)
>-    {
>-      tree def = default_def (cfun->static_chain_decl);
>-      if (def != NULL)
>-        insert_value_rank (def, ++rank);
>-    }
>-  
>-  /* Set up rank for each BB  */
>-  for (i = 0; i < n_basic_blocks; i++)
>-    bb_rank[bbs[i]] = ++rank  << 16;
>-
>-  free (bbs);
>-  calculate_dominance_info (CDI_DOMINATORS);
>-
>+  const operand_entry_t vr1 = (operand_entry_t) p1;
>+  const operand_entry_t vr2 = (operand_entry_t) p2;
>+  return vr1->op == vr2->op;
> }
> 
>-/* Cleanup after the reassociation pass, and print stats if
>-   requested.  */
>-
>-static void
>-fini_reassoc (void)
>-{
>-
>-  if (dump_file && (dump_flags & TDF_STATS))
>-    {
>-      fprintf (dump_file, "Reassociation stats:\n");
>-      fprintf (dump_file, "Reassociated by rank: %d\n", reassociate_stats.reassociated_by_rank);
>-      fprintf (dump_file, "Reassociated by match: %d\n", reassociate_stats.reassociated_by_match);
>-    }
>-  htab_delete (value_rank);
>-  htab_delete (seen_binops);
>-  free (bb_rank);
>-}
> 
> /* Given an expression E, return the rank of the expression.  */
> 
> static unsigned int
> get_rank (tree e)
> {
>-  valrank_t vr;
>+  operand_entry_t vr;
> 
>-  /* Constants have rank 0.  */  
>+  /* Constants have rank 0.  */
>   if (is_gimple_min_invariant (e))
>     return 0;
>-  
>+
>   /* SSA_NAME's have the rank of the expression they are the result
>      of.
>      For globals and uninitialized values, the rank is 0.
>@@ -290,24 +261,24 @@ get_rank (tree e)
>   if (TREE_CODE (e) == SSA_NAME)
>     {
>       tree stmt;
>-      tree rhs;      
>+      tree rhs;
>       unsigned int rank, maxrank;
>       int i;
>-      
>+
>       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
> 	  && e == default_def (SSA_NAME_VAR (e)))
>-	return find_value_rank (e)->rank;
>-      
>+	return find_operand_rank (e)->rank;
>+
>       stmt = SSA_NAME_DEF_STMT (e);
>       if (bb_for_stmt (stmt) == NULL)
> 	return 0;
>-      
>+
>       if (TREE_CODE (stmt) != MODIFY_EXPR
> 	  || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
> 	return bb_rank[bb_for_stmt (stmt)->index];
> 
>       /* If we already have a rank for this expression, use that.  */
>-      vr = find_value_rank (e);
>+      vr = find_operand_rank (e);
>       if (vr)
> 	return vr->rank;
> 
>@@ -318,24 +289,25 @@ get_rank (tree e)
>       rhs = TREE_OPERAND (stmt, 1);
>       if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0)
> 	rank = MAX (rank, get_rank (rhs));
>-      else 
>+      else
> 	{
>-	  for (i = 0; 
>-	       i < TREE_CODE_LENGTH (TREE_CODE (rhs)) 
>+	  for (i = 0;
>+	       i < TREE_CODE_LENGTH (TREE_CODE (rhs))
> 		 && TREE_OPERAND (rhs, i)
>-		 && rank != maxrank; i++)
>+		 && rank != maxrank;
>+	       i++)
> 	    rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
> 	}
>-      
>+
>       if (dump_file && (dump_flags & TDF_DETAILS))
> 	{
> 	  fprintf (dump_file, "Rank for ");
> 	  print_generic_expr (dump_file, e, 0);
> 	  fprintf (dump_file, " is %d\n", (rank + 1));
> 	}
>-      
>+
>       /* Note the rank in the hashtable so we don't recompute it.  */
>-      insert_value_rank (e, (rank + 1));
>+      insert_operand_rank (e, (rank + 1));
>       return (rank + 1);
>     }
> 
>@@ -343,282 +315,1241 @@ get_rank (tree e)
>   return 0;
> }
> 
>+DEF_VEC_P(operand_entry_t);
>+DEF_VEC_ALLOC_P(operand_entry_t, heap);
>+
>+/* We want integer ones to end up last no matter what, since they are
>+   the ones we can do the most with.  */
>+#define INTEGER_CONST_TYPE 1 << 3
>+#define FLOAT_CONST_TYPE 1 << 2
>+#define OTHER_CONST_TYPE 1 << 1
>+
>+/* Classify an invariant tree into integer, float, or other, so that
>+   we can sort them to be near other constants of the same type.  */
>+static inline int
>+constant_type (tree t)
>+{
>+  if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
>+    return INTEGER_CONST_TYPE;
>+  else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
>+    return FLOAT_CONST_TYPE;
>+  else
>+    return OTHER_CONST_TYPE;
>+}
>+
>+/* qsort comparison function to sort operand entries PA and PB by rank
>+   so that the sorted array is ordered by rank in decreasing order.  */
>+static int
>+sort_by_operand_rank (const void *pa, const void *pb)
>+{
>+  const operand_entry_t oea = *(const operand_entry_t *)pa;
>+  const operand_entry_t oeb = *(const operand_entry_t *)pb;
>+
>+  /* It's nicer for optimize_expression if constants that are likely
>+     to fold when added/multiplied//whatever are put next to each
>+     other.  Since all constants have rank 0, order them by type.  */
>+  if (oeb->rank == 0 &&  oea->rank == 0)
>+    return constant_type (oeb->op) - constant_type (oea->op);
>+
>+  /* Lastly, make sure the versions that are the same go next to each
>+     other.  We use SSA_NAME_VERSION because it's stable.  */
>+  if ((oeb->rank - oea->rank == 0)
>+      && TREE_CODE (oea->op) == SSA_NAME
>+      && TREE_CODE (oeb->op) == SSA_NAME)
>+    return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
>+
>+  return oeb->rank - oea->rank;
>+}
>+
>+/* Add an operand entry to *OPS for the tree operand OP.  */
>+
>+static void
>+add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
>+{
>+  operand_entry_t oe = pool_alloc (operand_entry_pool);
>+
>+  oe->op = op;
>+  oe->rank = get_rank (op);
>+  VEC_safe_push (operand_entry_t, heap, *ops, oe);
>+}
> 
>-/* Decide whether we should transpose RHS and some operand of
>-   LHSDEFOP.
>-   If yes, then return true and set TAKEOP to the operand number of LHSDEFOP to
>-   switch RHS for.
>-   Otherwise, return false.  */
>+/* Return true if STMT is reassociable operation containing a binary
>+   operation with tree code CODE.  */
> 
> static bool
>-should_transpose (tree rhs ATTRIBUTE_UNUSED, 
>-		  unsigned int rhsrank,
>-		  tree lhsdefop, unsigned int *takeop)
>-{
>-  /* Attempt to expose the low ranked
>-     arguments to CSE if we have something like:
>-     a = <rank 2> + c (rank 1)
>-     b = a (rank 3) + d (rank 1)
>-     We want to transform this into:
>-     a = c + d
>-     b = <rank 2> + <rank 3>
>-     
>-     The op finding part wouldn't be necessary if
>-			 we could swap the operands above and not have
>-			 update_stmt change them back on us.
>-  */
>-  unsigned int lowrankop;
>-  unsigned int lowrank;
>-  unsigned int highrank;
>-  unsigned int highrankop;
>-  unsigned int temp;
>-  
>-  lowrankop = 0;
>-  *takeop = 1;
>-  lowrank = get_rank (TREE_OPERAND (lhsdefop, 0));
>-  temp = get_rank (TREE_OPERAND (lhsdefop, 1));
>-  highrank = temp;
>-  highrankop = 1;
>-  if (temp < lowrank)
>-    {
>-      lowrankop = 1;
>-      highrankop = 0;
>-      *takeop = 0;
>-      highrank = lowrank;
>-      lowrank = temp;
>-    }
>-  
>-  /* If highrank == lowrank, then we had something
>-     like:
>-     a = <rank 1> + <rank 1> 
>-     already, so there is no guarantee that
>-     swapping our argument in is going to be
>-     better.
>-     If we run reassoc twice, we could probably
>-     have a flag that switches this behavior on,
>-     so that we try once without it, and once with
>-     it, so that redundancy elimination sees it
>-     both ways.
>-  */		      
>-  
>-  if (lowrank == rhsrank && highrank != lowrank)
>+is_reassociable_op (tree stmt, enum tree_code code)
>+{
>+  if (!IS_EMPTY_STMT (stmt)
>+      && TREE_CODE (stmt) == MODIFY_EXPR
>+      && TREE_CODE (TREE_OPERAND (stmt, 1)) == code
>+      && has_single_use (TREE_OPERAND (stmt, 0)))
>     return true;
>-
>-  /* Also, see if the LHS's high ranked op should be switched with our
>-     RHS simply because it is greater in rank than our current RHS.  */
>-  if (TREE_CODE (TREE_OPERAND (lhsdefop, highrankop)) == SSA_NAME)
>-    {
>-      tree iop = SSA_NAME_DEF_STMT (TREE_OPERAND (lhsdefop, highrankop));
>-      if (TREE_CODE (iop) == MODIFY_EXPR)
>-	iop = TREE_OPERAND (iop, 1);
>-      if (TREE_CODE (iop) == TREE_CODE (lhsdefop))
>-	*takeop = 1;
>-      if (rhsrank < get_rank (TREE_OPERAND (lhsdefop, *takeop)))
>-	return true;
>-    }		  
>-  
>   return false;
> }
> 
>-/* Attempt to reassociate the associative binary operator BEXPR, which
>-   is in the statement pointed to by CURRBSI.  Return true if we
>-   changed the statement.  */
>+
>+/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
>+   operand of the negate operation.  Otherwise, return NULL.  */
>+
>+static tree
>+get_unary_op (tree name, enum tree_code opcode)
>+{
>+  tree stmt = SSA_NAME_DEF_STMT (name);
>+  tree rhs;
>+
>+  if (TREE_CODE (stmt) != MODIFY_EXPR)
>+    return NULL_TREE;
>+
>+  rhs = TREE_OPERAND (stmt, 1);
>+  if (TREE_CODE (rhs) == opcode)
>+    return TREE_OPERAND (rhs, 0);
>+  return NULL_TREE;
>+}
>+
>+/* If CURR and LAST are a pair of ops that OPCODE allows us to
>+   eliminate through equivalences, do so, remove them from OPS, and
>+   return true.  Otherwise, return false.  */
> 
> static bool
>-reassociate_expr (tree bexpr, block_stmt_iterator *currbsi)
>+eliminate_duplicate_pair (enum tree_code opcode,
>+			  VEC (operand_entry_t, heap) **ops,
>+			  bool *all_done,
>+			  unsigned int i,
>+			  operand_entry_t curr,
>+			  operand_entry_t last)
> {
>-  tree lhs = TREE_OPERAND (bexpr, 0);
>-  tree rhs = TREE_OPERAND (bexpr, 1);
>-  tree lhsdef;
>-  tree lhsi;
>-  bool changed = false;
>-  unsigned int lhsrank = get_rank (lhs);
>-  unsigned int rhsrank = get_rank (rhs);
>-
>-  /* If unsafe math optimizations we can do reassociation for non-integral
>-     types.  */
>-  if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>-       || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
>-      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
>-	  || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
>-	  || !flag_unsafe_math_optimizations))
>-    return false;
>-    
>-  /* We want the greater ranked operand to be our "LHS" for simplicity
>-     sake.  There is no point in actually modifying the expression, as
>-     update_stmt will simply resort the operands anyway. */
>-  if (lhsrank < rhsrank)
>+
>+  /* If we have two of the same op, and the opcode is & or |, we can
>+     eliminate one of them.
>+     If we have two of the same op, and the opcode is ^, we can
>+     eliminate both of them.  */
>+
>+  if (last && last->op == curr->op)
>     {
>-      tree temp;
>-      unsigned int temp1;
>-      temp = lhs;
>-      lhs = rhs;
>-      rhs = temp;
>-      temp1 = lhsrank;
>-      lhsrank = rhsrank;
>-      rhsrank = temp1;
>-    }
>-
>-  /* If the high ranked operand is an SSA_NAME, and the binary
>-     operator is not something we've already seen somewhere else
>-     (i.e., it may be redundant), attempt to reassociate it.
>-     
>-     We can't reassociate expressions unless the expression we are
>-     going to reassociate with is only used in our current expression,
>-     or else we may screw up other computations, like so:
>-
>-     a = b + c
>-     e = a + d
>-     
>-     g = a + f
>-     
>-     We cannot reassociate and rewrite the "a = ..." , 
>-     because that would change the value of the computation of 
>-     "g = a + f".  */
>-  if (TREE_CODE (lhs) == SSA_NAME && !find_seen_binop (lhs, rhs))
>-    {
>-      lhsdef = SSA_NAME_DEF_STMT (lhs);
>-      if (TREE_CODE (lhsdef) == MODIFY_EXPR)
>-	{
>-	  lhsi = TREE_OPERAND (lhsdef, 1);
>-	  if (TREE_CODE (lhsi) == TREE_CODE (bexpr))
>-	    {
>-	      use_operand_p use;
>-	      tree usestmt;
>-	      if (single_imm_use (lhs, &use, &usestmt))
>-		{
>-		  unsigned int takeop = 0;
>-		  unsigned int otherop = 1;
>-		  bool foundmatch = false;
>-		  bool foundrank = false;
>-
>-		  /* If we can easily transpose this into an operation
>-		     we've already seen, let's do that.
>-		     otherwise, let's try to expose low ranked ops to
>-		     CSE.  */
>-		  if (find_seen_binop (TREE_OPERAND (lhsi, 1), rhs))
>-		    {
>-		      takeop = 0;
>-		      otherop = 1;
>-		      foundmatch = true;
>-		    }
>-		  else if (find_seen_binop (TREE_OPERAND (lhsi, 0),
>-					    rhs))
>-		    {
>-		      takeop = 1;
>-		      otherop = 0;
>-		      foundmatch = true;
>-		    }
>-		  else if (should_transpose (rhs, rhsrank, lhsi,
>-					     &takeop))
>-		    {
>-		      foundrank = true;
>-		    }		  
>-		  if (foundmatch || foundrank)
>-		    {
>-		      block_stmt_iterator lhsbsi = bsi_for_stmt (lhsdef);
>-		      if (dump_file && (dump_flags & TDF_DETAILS))
>-			{
>-			  fprintf (dump_file, "Reassociating by %s\n",
>-				   foundmatch ? "match" : "rank");
>-			  fprintf (dump_file, "Before LHS:");
>-			  print_generic_stmt (dump_file, lhsi, 0);
>-			  fprintf (dump_file, "Before curr expr:");
>-			  print_generic_stmt (dump_file, bexpr, 0);
>-			}
>-		      TREE_OPERAND (bexpr, 0) = TREE_OPERAND (lhsi, takeop);
>-		      TREE_OPERAND (lhsi, takeop) = rhs;
>-		      TREE_OPERAND (bexpr, 1) = TREE_OPERAND (lhsdef, 0);
>-		      if (dump_file && (dump_flags & TDF_DETAILS))
>-			{
>-			  fprintf (dump_file, "After LHS:");
>-			  print_generic_stmt (dump_file, lhsi, 0);
>-			  fprintf (dump_file, "After curr expr:");
>-			  print_generic_stmt (dump_file, bexpr, 0);
>-			}
>-		      bsi_move_before (&lhsbsi, currbsi);
>-		      update_stmt (lhsdef);
>-		      update_stmt (bsi_stmt (*currbsi));
>-		      lhsbsi = bsi_for_stmt (lhsdef);
>-		      update_stmt (bsi_stmt (lhsbsi));
>-
>-		      /* If update_stmt didn't reorder our operands,
>-			 we'd like to recurse on the expression we
>-			 just reassociated and reassociate it
>-			 top-down, exposing further opportunities.
>-			 Unfortunately, update_stmt does reorder them,
>-			 so we can't do this cheaply.  */
>-		      if (!foundmatch)
>-			reassociate_stats.reassociated_by_rank++;
>-		      else
>-			reassociate_stats.reassociated_by_match++;
>-		      return true;
>-		    }
>-		}
>+      switch (opcode)
>+	{
>+	case BIT_IOR_EXPR:
>+	case BIT_AND_EXPR:
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
>+	    {
>+	      fprintf (dump_file, "Equivalence: ");
>+	      print_generic_expr (dump_file, curr->op, 0);
>+	      fprintf (dump_file, " [&|] ");
>+	      print_generic_expr (dump_file, last->op, 0);
>+	      fprintf (dump_file, " -> ");
>+	      print_generic_stmt (dump_file, last->op, 0);
>+	    }
>+
>+	  VEC_ordered_remove (operand_entry_t, *ops, i);
>+	  reassociate_stats.ops_eliminated ++;
>+
>+	  return true;
>+
>+	case BIT_XOR_EXPR:
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
>+	    {
>+	      fprintf (dump_file, "Equivalence: ");
>+	      print_generic_expr (dump_file, curr->op, 0);
>+	      fprintf (dump_file, " ^ ");
>+	      print_generic_expr (dump_file, last->op, 0);
>+	      fprintf (dump_file, " -> nothing\n");
> 	    }
>+
>+	  reassociate_stats.ops_eliminated += 2;
>+
>+	  if (VEC_length (operand_entry_t, *ops) == 2)
>+	    {
>+	      VEC_free (operand_entry_t, heap, *ops);
>+	      *ops = NULL;
>+	      add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
>+						 integer_zero_node));
>+	      *all_done = true;
>+	    }
>+	  else
>+	    {
>+	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
>+	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
>+	    }
>+
>+	  return true;
>+
>+	default:
>+	  break;
> 	}
>     }
>-  return changed;
>+  return false;
> }
> 
>-/* Reassociate expressions in basic block BB and its dominator as
>-   children , return true if any
>-   expressions changed.  */
>+/* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
>+   look in OPS for a corresponding positive operation to cancel it
>+   out.  If we find one, remove the other from OPS, replace
>+   OPS[CURRINDEX] with 0, and return true.  Otherwise, return
>+   false. */
> 
> static bool
>-reassociate_bb (basic_block bb)
>+eliminate_plus_minus_pair (enum tree_code opcode,
>+			   VEC (operand_entry_t, heap) **ops,
>+			   unsigned int currindex,
>+			   operand_entry_t curr)
> {
>-  bool changed = false;
>-  block_stmt_iterator bsi;
>-  basic_block son;
>+  tree negateop;
>+  unsigned int i;
>+  operand_entry_t oe;
> 
>-  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
>+  if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
>+    return false;
>+
>+  negateop = get_unary_op (curr->op, NEGATE_EXPR);
>+  if (negateop == NULL_TREE)
>+    return false;
>+
>+  /* Any non-negated version will have a rank that is one less than
>+     the current rank.  So once we hit those ranks, if we don't find
>+     one, we can stop.  */
>+
>+  for (i = currindex;
>+       VEC_iterate (operand_entry_t, *ops, i, oe)  && oe->rank >= curr->rank - 1 ;
>+       i++)
>     {
>-      tree stmt = bsi_stmt (bsi);
>-      
>-      if (TREE_CODE (stmt) == MODIFY_EXPR)
>+      if (oe->op == negateop && i != currindex)
> 	{
>-	  tree rhs = TREE_OPERAND (stmt, 1);
>-	  if (associative_tree_code (TREE_CODE (rhs)))
>+
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
> 	    {
>-	      if (reassociate_expr (rhs, &bsi))
>-		{
>-		  changed = true;
>-		  update_stmt (stmt);		  
>-		}
>-	      insert_seen_binop (TREE_OPERAND (rhs, 0),
>-				 TREE_OPERAND (rhs, 1));
>+	      fprintf (dump_file, "Equivalence: ");
>+	      print_generic_expr (dump_file, negateop, 0);
>+	      fprintf (dump_file, " + -");
>+	      print_generic_expr (dump_file, oe->op, 0);
>+	      fprintf (dump_file, " -> 0\n");
> 	    }
>+
>+	  VEC_ordered_remove (operand_entry_t, *ops, i);
>+	  add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
>+					    integer_zero_node));
>+	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
>+	  reassociate_stats.ops_eliminated ++;
>+
>+	  return true;
> 	}
>     }
>-  for (son = first_dom_son (CDI_DOMINATORS, bb);
>-       son;
>-       son = next_dom_son (CDI_DOMINATORS, son))
>-    {
>-      changed |= reassociate_bb (son);
>-    }
>-  return changed;  
>+
>+  return false;
> }
> 
>-	
>+/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
>+   bitwise not expression, look in OPS for a corresponding operand to
>+   cancel it out.  If we find one, remove the other from OPS, replace
>+   OPS[CURRINDEX] with 0, and return true.  Otherwise, return
>+   false. */
>+
> static bool
>-do_reassoc (void)
>-{  
>-  bool changed = false;
>-  
>-  changed = reassociate_bb (ENTRY_BLOCK_PTR);
>+eliminate_not_pairs (enum tree_code opcode,
>+		     VEC (operand_entry_t, heap) **ops,
>+		     unsigned int currindex,
>+		     operand_entry_t curr)
>+{
>+  tree notop;
>+  unsigned int i;
>+  operand_entry_t oe;
>+
>+  if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
>+      || TREE_CODE (curr->op) != SSA_NAME)
>+    return false;
>+
>+  notop = get_unary_op (curr->op, BIT_NOT_EXPR);
>+  if (notop == NULL_TREE)
>+    return false;
>+
>+  /* Any non-not version will have a rank that is one less than
>+     the current rank.  So once we hit those ranks, if we don't find
>+     one, we can stop.  */
>+
>+  for (i = currindex;
>+       VEC_iterate (operand_entry_t, *ops, i, oe)  && oe->rank >= curr->rank - 1 ;
>+       i++)
>+    {
>+      if (oe->op == notop && i != currindex)
>+	{
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
>+	    {
>+	      fprintf (dump_file, "Equivalence: ");
>+	      print_generic_expr (dump_file, notop, 0);
>+	      if (opcode == BIT_AND_EXPR)
>+		fprintf (dump_file, " & ~");
>+	      else if (opcode == BIT_IOR_EXPR)
>+		fprintf (dump_file, " | ~");
>+	      print_generic_expr (dump_file, oe->op, 0);
>+	      if (opcode == BIT_AND_EXPR)
>+		fprintf (dump_file, " -> 0\n");
>+	      else if (opcode == BIT_IOR_EXPR)
>+		fprintf (dump_file, " -> -1\n");
>+	    }
>+
>+	  if (opcode == BIT_AND_EXPR)
>+	    oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
>+	  else if (opcode == BIT_IOR_EXPR)
>+	    oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
>+					  TYPE_PRECISION (TREE_TYPE (oe->op)));
>+
>+	  reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1;
>+	  VEC_free (operand_entry_t, heap, *ops);
>+	  *ops = NULL;
>+	  VEC_safe_push (operand_entry_t, heap, *ops, oe);
>+	  return true;
>+	}
>+    }
> 
>-  return changed;  
>+  return false;
> }
> 
>+/* Use constant value that may be present in OPS to try to eliminate
>+   operands.  Note that this function is only really used when we've
>+   eliminated ops for other reasons, or merged constants.  Across
>+   single statements, fold already does all of this, plus more.  There
>+   is little point in duplicating logic, so I've only included the
>+   identities that I could ever construct testcases to trigger.  */
> 
>-/* Gate and execute functions for Reassociation.  */
>+static void
>+eliminate_using_constants (enum tree_code opcode,
>+			   VEC(operand_entry_t, heap) **ops)
>+{
>+  operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
>+
>+  if (oelast->rank == 0 && INTEGRAL_TYPE_P (TREE_TYPE (oelast->op)))
>+    {
>+      switch (opcode)
>+	{
>+	case BIT_AND_EXPR:
>+	  if (integer_zerop (oelast->op))
>+	    {
>+	      if (VEC_length (operand_entry_t, *ops) != 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    fprintf (dump_file, "Found & 0, removing all other ops\n");
>+		  reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1;
>+		  VEC_free (operand_entry_t, heap, *ops);
>+		  *ops = NULL;
>+		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
>+		  return;
>+		}
>+	    }
>+	  /* FALLTHRU */
>+	case BIT_IOR_EXPR:
>+	  if (integer_all_onesp (oelast->op))
>+	    {
>+	      if (VEC_length (operand_entry_t, *ops) != 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    fprintf (dump_file, "Found [&|] -1, removing\n");
>+		  VEC_pop (operand_entry_t, *ops);
>+		  reassociate_stats.ops_eliminated++;
>+		}
>+	    }
>+	  break;
>+	case MULT_EXPR:
>+	  if (integer_zerop (oelast->op))
>+	    {
>+	      if (VEC_length (operand_entry_t, *ops) != 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    fprintf (dump_file, "Found * 0, removing all other ops\n");
>+		  reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1;
>+		  VEC_free (operand_entry_t, heap, *ops);
>+		  *ops = NULL;
>+		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
>+		  return;
>+		}
>+	    }
>+	  else if (integer_onep (oelast->op))
>+	    {
>+	      if (VEC_length (operand_entry_t, *ops) != 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    fprintf (dump_file, "Found * 1, removing\n");
>+		  VEC_pop (operand_entry_t, *ops);
>+		  reassociate_stats.ops_eliminated++;
>+		  return;
>+		}
>+	    }
>+	  break;
>+	case BIT_XOR_EXPR:
>+	case PLUS_EXPR:
>+	case MINUS_EXPR:
>+	  if (integer_zerop (oelast->op))
>+	    {
>+	      if (VEC_length (operand_entry_t, *ops) != 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    fprintf (dump_file, "Found [|^+] 0, removing\n");
>+		  VEC_pop (operand_entry_t, *ops);
>+		  reassociate_stats.ops_eliminated++;
>+		  return;
>+		}
>+	    }
>+	  break;
>+	default:
>+	  break;
>+	}
>+    }
>+}
>+
>+/* Perform various identities and other optimizations on the list of
>+   operand entries, stored in OPS.  The tree code for the binary
>+   operation between all the operands is OPCODE.  */
> 
> static void
>-execute_reassoc (void)
>+optimize_ops_list (enum tree_code opcode,
>+		   VEC (operand_entry_t, heap) **ops)
> {
>-  init_reassoc ();
>-  do_reassoc ();
>+  unsigned int length = VEC_length (operand_entry_t, *ops);
>+  unsigned int i;
>+  operand_entry_t oe;
>+  int fconstcount = 0;
>+  int iconstcount = 0;
>+  int oconstcount = 0;
>+  operand_entry_t oelast = NULL;
>+  bool iterate = false;
>+
>+  if (length == 1)
>+    return;
>+
>+  oelast = VEC_last (operand_entry_t, *ops);
>+
>+  /* If the last two are constants, pop the constants off, merge them
>+     and try the next two.  */
>+  if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
>+    {
>+      operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
>+
>+      if (oelm1->rank == 0
>+	  && is_gimple_min_invariant (oelm1->op)
>+	  && lang_hooks.types_compatible_p (TREE_TYPE (oelm1->op),
>+					    TREE_TYPE (oelast->op)))
>+	{
>+	  tree folded = fold_build2 (opcode, TREE_TYPE (oelm1->op),
>+				     oelm1->op, oelast->op);
>+
>+	  if (is_gimple_min_invariant (folded))
>+	    {
>+	      if (dump_file && (dump_flags & TDF_DETAILS))
>+		fprintf (dump_file, "Merging constants\n");
>+
>+	      VEC_pop (operand_entry_t, *ops);
>+	      VEC_pop (operand_entry_t, *ops);
>+
>+	      add_to_ops_vec (ops, folded);
>+	      reassociate_stats.constants_eliminated++;
>+
>+	      optimize_ops_list (opcode, ops);
>+	      return;
>+	    }
>+	}
>+    }
>+
>+  eliminate_using_constants (opcode, ops);
>+  oelast = NULL;
>+
>+  for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
>+    {
>+      bool done = false;
>+
>+      if (eliminate_not_pairs (opcode, ops, i, oe))
>+	return;
>+      if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
>+	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
>+	{
>+	  if (done)
>+	    return;
>+	  iterate = true;
>+	  oelast = NULL;
>+	  continue;
>+	}
>+      if (oe->rank == 0 && is_gimple_min_invariant (oe->op))
>+	{
>+	  switch (constant_type (oe->op))
>+	    {
>+	    case INTEGER_CONST_TYPE:
>+	      iconstcount++;
>+	      break;
>+
>+	    case FLOAT_CONST_TYPE:
>+	      fconstcount++;
>+	      break;
>+	    case OTHER_CONST_TYPE:
>+	      oconstcount++;
>+	      break;
>+
>+	    default:
>+	      break;
>+	    }
>+	}
>+      oelast = oe;
>+      i++;
>+    }
>+  if (iconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS))
>+    fprintf (dump_file, "iconstcount is %d\n", iconstcount);
>+  if (fconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS))
>+    fprintf (dump_file, "fconstcount is %d\n", fconstcount);
>+  if (oconstcount> 0 && dump_file && (dump_flags & TDF_DETAILS))
>+    fprintf (dump_file, "oconstcount is %d\n", oconstcount);
>+
>+  length  = VEC_length (operand_entry_t, *ops);
>+  oelast = VEC_last (operand_entry_t, *ops);
>+
>+  if (iterate)
>+    optimize_ops_list (opcode, ops);
>+}
>+
>+#if 0
>+static tree
>+merge_two_ops (enum tree_code code, tree op1, tree op2)
>+{
>+  tree temp;
>+  tree name;
>+  tree newexpr;
>+
>+  temp = create_tmp_var (TREE_TYPE (op1), "mergetmp");
>+  add_referenced_tmp_var (temp);
>+  newexpr = fold_build2 (code, TREE_TYPE (op1), op1, op2);
>+  newexpr = build (MODIFY_EXPR, TREE_TYPE (op1), temp, newexpr);
>+  name = make_ssa_name (temp, newexpr);
>+  TREE_OPERAND (newexpr, 0) = name;
>+  return name;
>+}
>+
>+static void
>+merge_leaves_of_rank (enum tree_code opcode, VEC (tree, heap) *worklist,
>+		      VEC (tree, heap) **results)
>+{
>+  unsigned int length = VEC_length (tree, worklist);
>+  if (length % 2 ==  1)
>+    {
>+      VEC_safe_push (tree, heap, *results, VEC_pop (tree, worklist));
>+      length--;
>+    }
>+  while (length != 0)
>+    {
>+      tree oe1 = VEC_pop (tree, worklist);
>+      tree oe2 = VEC_pop (tree, worklist);
>+      length -= 2;
>+      VEC_safe_push (tree, heap, *results, merge_two_ops (opcode, oe1, oe2));
>+    }
>+  VEC_free (tree, heap, worklist);
>+
>+}
>+
>+static void
>+rewrite_expr_tree_new (tree stmt, enum tree_code opcode,
>+		       VEC (operand_entry_t, heap) **ops)
>+{
>+  VEC(tree, heap) *initial_merge_worklist = NULL;
>+  VEC(tree, heap) *merge_worklist = NULL;
>+  VEC(tree, heap) *stmtsforsale = NULL;
>+  operand_entry_t oe;
>+  unsigned int opslength = VEC_length (operand_entry_t, *ops);
>+
>+  if (opslength < 4)
>+    return;
>+
>+  /* Initialize the worklist */
>+  while (VEC_length (operand_entry_t, *ops) != 0)
>+    {
>+      initial_merge_worklist = NULL;
>+      oe = VEC_pop (operand_entry_t, *ops);
>+      while (!VEC_empty (operand_entry_t, *ops)
>+	     && VEC_last (operand_entry_t, *ops)->rank == oe->rank)
>+	{
>+	  VEC_safe_push (tree, heap, initial_merge_worklist, oe->op);
>+	  oe = VEC_pop (operand_entry_t, *ops);
>+	}
>+      VEC_safe_push (tree, heap, initial_merge_worklist, oe->op);
>+      merge_leaves_of_rank (opcode, initial_merge_worklist, &merge_worklist);
>+    }
>+
>+  gcc_assert (VEC_length (tree, merge_worklist) <= opslength);
>+
>+  while (VEC_length (tree, merge_worklist) != 0)
>+    add_to_ops_vec (ops, VEC_pop (tree, merge_worklist));
>+}
>+#endif
>+
>+static bool
>+is_phi_for_stmt (tree stmt, tree operand)
>+{
>+  tree def_stmt;
>+  tree lhs = TREE_OPERAND (stmt, 0);
>+  use_operand_p arg_p;
>+  ssa_op_iter i;
>+
>+  if (TREE_CODE (operand) != SSA_NAME)
>+    return false;
>+
>+  def_stmt = SSA_NAME_DEF_STMT (operand);
>+  if (TREE_CODE (def_stmt) != PHI_NODE)
>+    return false;
>+
>+  FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
>+    if (lhs == USE_FROM_PTR (arg_p))
>+      return true;
>+  return false;
>+}
>+
>+/* Recursively rewrite our linearized statements so that the operators
>+   match those in OPS[OPINDEX], putting the computation in rank
>+   order.  */
>+
>+static void
>+rewrite_expr_tree (tree stmt, unsigned int opindex,
>+		   VEC(operand_entry_t, heap) * ops)
>+{
>+  tree rhs = TREE_OPERAND (stmt, 1);
>+  operand_entry_t oe;
>+
>+  /* If we have three operands left, then we want to make sure the one
>+     that gets the double binary op are the ones with the same rank.
>+
>+     The alternative we try is to see if this is a destructive
>+     update style statement, which is like:
>+     b = phi (a, ...)
>+     a = c + b;
>+     In that case, we want to use the destructive update form to
>+     expose the possible vectorizer sum reduction opportunity.
>+     In that case, the third operand will be the phi node.
>+
>+     We could, of course, try to be better as noted above, and do a
>+     lot of work to try to find these opportunities in >3 operand
>+     cases, but it is unlikely to be worth it.  */
>+  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>+    {
>+      operand_entry_t oe1, oe2, oe3;
>+
>+      oe1 = VEC_index (operand_entry_t, ops, opindex);
>+      oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
>+      oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
>+
>+      if ((oe1->rank == oe2->rank
>+	   && oe2->rank != oe3->rank)
>+	  || (is_phi_for_stmt (stmt, oe3->op)
>+	      && !is_phi_for_stmt (stmt, oe1->op)
>+	      && !is_phi_for_stmt (stmt, oe2->op)))
>+	{
>+	  struct operand_entry temp = *oe3;
>+	  oe3->op = oe1->op;
>+	  oe3->rank = oe1->rank;
>+	  oe1->op = temp.op;
>+	  oe1->rank= temp.rank;
>+	}
>+    }
>+
>+  /* The final recursion case for this function is that you have
>+     exactly two operations left.
>+     If we had one exactly one op in the entire list to start with, we
>+     would have never called this function, and the tail recursion
>+     rewrites them one at a time.  */
>+  if (opindex + 2 == VEC_length (operand_entry_t, ops))
>+    {
>+      operand_entry_t oe1, oe2;
>+
>+      oe1 = VEC_index (operand_entry_t, ops, opindex);
>+      oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
>+
>+      if (TREE_OPERAND (rhs, 0) != oe1->op
>+	  || TREE_OPERAND (rhs, 1) != oe2->op)
>+	{
>+
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
>+	    {
>+	      fprintf (dump_file, "Transforming ");
>+	      print_generic_expr (dump_file, rhs, 0);
>+	    }
>+
>+	  TREE_OPERAND (rhs, 0) = oe1->op;
>+	  TREE_OPERAND (rhs, 1) = oe2->op;
>+	  update_stmt (stmt);
>+
>+	  if (dump_file && (dump_flags & TDF_DETAILS))
>+	    {
>+	      fprintf (dump_file, " into ");
>+	      print_generic_stmt (dump_file, rhs, 0);
>+	    }
>+
>+	}
>+      return;
>+    }
>+
>+  /* If we hit here, we should have 3 or more ops left.  */
>+  gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
>+
>+  /* Rewrite the next operator.  */
>+  oe = VEC_index (operand_entry_t, ops, opindex);
>+
>+  if (oe->op != TREE_OPERAND (rhs, 1))
>+    {
>+
>+      if (dump_file && (dump_flags & TDF_DETAILS))
>+	{
>+	  fprintf (dump_file, "Transforming ");
>+	  print_generic_expr (dump_file, rhs, 0);
>+	}
>+
>+      TREE_OPERAND (rhs, 1) = oe->op;
>+      update_stmt (stmt);
>+
>+      if (dump_file && (dump_flags & TDF_DETAILS))
>+	{
>+	  fprintf (dump_file, " into ");
>+	  print_generic_stmt (dump_file, rhs, 0);
>+	}
>+    }
>+  /* Recurse on the LHS of the binary operator, which is guaranteed to
>+     be the non-leaf side.  */
>+  rewrite_expr_tree (SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)),
>+		     opindex + 1, ops);
>+}
>+
>+/* Transform STMT, which is really (A +B) + (C + D) into the left
>+   linear form, ((A+B)+C)+D.
>+   Recurse on D if necessary.  */
>+
>+static void
>+linearize_expr (tree stmt)
>+{
>+  block_stmt_iterator bsinow, bsirhs;
>+  tree rhs = TREE_OPERAND (stmt, 1);
>+  enum tree_code rhscode = TREE_CODE (rhs);
>+  tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
>+  tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
>+  tree newbinrhs = NULL_TREE;
>+
>+  gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs))
>+	      && is_reassociable_op (binrhs, TREE_CODE (rhs)));
>+
>+  bsinow = bsi_for_stmt (stmt);
>+  bsirhs = bsi_for_stmt (binrhs);
>+  bsi_move_before (&bsirhs, &bsinow);
>+
>+  TREE_OPERAND (rhs, 1) = TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0);
>+  if (TREE_CODE (TREE_OPERAND (rhs, 1)) == SSA_NAME)
>+    newbinrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
>+  TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0) = TREE_OPERAND (binlhs, 0);
>+  TREE_OPERAND (rhs, 0) = TREE_OPERAND (binrhs, 0);
>+
>+  if (dump_file && (dump_flags & TDF_DETAILS))
>+    {
>+      fprintf (dump_file, "Linearized: ");
>+      print_generic_stmt (dump_file, rhs, 0);
>+    }
>+
>+  reassociate_stats.linearized++;
>+  update_stmt (binrhs);
>+  update_stmt (binlhs);
>+  update_stmt (stmt);
>+  TREE_VISITED (binrhs) = 1;
>+  TREE_VISITED (binlhs) = 1;
>+  TREE_VISITED (stmt) = 1;
>+
>+  /* Tail recurse on the new rhs if it still needs reassociation.  */
>+  if (newbinrhs && is_reassociable_op (newbinrhs, rhscode))
>+    linearize_expr (stmt);
>+
>+}
>+
>+/* If LHS has a single immediate use that is a MODIFY_EXPR, return
>+   it.  Otherwise, return NULL.  */
>+
>+static tree
>+get_single_immediate_use (tree lhs)
>+{
>+  use_operand_p immuse;
>+  tree immusestmt;
>+
>+  if (TREE_CODE (lhs) == SSA_NAME
>+      && single_imm_use (lhs, &immuse, &immusestmt))
>+    {
>+      if (TREE_CODE (immusestmt) == RETURN_EXPR)
>+	immusestmt = TREE_OPERAND (immusestmt, 0);
>+      if (TREE_CODE (immusestmt) == MODIFY_EXPR)
>+	return immusestmt;
>+    }
>+  return NULL_TREE;
>+}
>+static VEC(tree, heap) *broken_up_subtracts;
>+
>+
>+/* Recursively negate the value of TONEGATE, and return the SSA_NAME
>+   representing the negated value.  Insertions of any necessary
>+   instructions go before BSI.
>+   This function is recursive in that, if you hand it "a_5" as the
>+   value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
>+   transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
>+
>+static tree
>+negate_value (tree tonegate, block_stmt_iterator *bsi)
>+{
>+  tree negatedef = tonegate;
>+  tree resultofnegate;
>+
>+  if (TREE_CODE (tonegate) == SSA_NAME)
>+    negatedef = SSA_NAME_DEF_STMT (tonegate);
>+
>+  /* If we are trying to negate a name, defined by an add, negate the
>+     add operands instead.  */
>+  if (TREE_CODE (tonegate) == SSA_NAME
>+      && TREE_CODE (negatedef) == MODIFY_EXPR
>+      && TREE_CODE (TREE_OPERAND (negatedef, 0)) == SSA_NAME
>+      && num_imm_uses (TREE_OPERAND (negatedef, 0)) == 1
>+      && TREE_CODE (TREE_OPERAND (negatedef, 1)) == PLUS_EXPR)
>+    {
>+      block_stmt_iterator bsi;
>+      tree binop = TREE_OPERAND (negatedef, 1);
>+
>+      bsi = bsi_for_stmt (negatedef);
>+      TREE_OPERAND (binop, 0) = negate_value (TREE_OPERAND (binop, 0),
>+					      &bsi);
>+      bsi = bsi_for_stmt (negatedef);
>+      TREE_OPERAND (binop, 1) = negate_value (TREE_OPERAND (binop, 1),
>+					      &bsi);
>+      update_stmt (negatedef);
>+      return TREE_OPERAND (negatedef, 0);
>+    }
>+
>+  tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
>+  resultofnegate = force_gimple_operand_bsi (bsi, tonegate, true,
>+					     NULL_TREE);
>+  VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate);
>+  return resultofnegate;
>+
>+}
>+
>+/* Return true if we should break up the subtract in STMT into an add
>+   with negate.  This is true when we the subtract operands are really
>+   adds, or the subtract itself is used in an add expression.  In
>+   either case, breaking up the subtract into an add with negate
>+   exposes the adds to reassociation.  */
>+
>+static bool
>+should_break_up_subtract (tree stmt)
>+{
>+
>+  tree lhs = TREE_OPERAND (stmt, 0);
>+  tree rhs = TREE_OPERAND (stmt, 1);
>+  tree binlhs = TREE_OPERAND (rhs, 0);
>+  tree binrhs = TREE_OPERAND (rhs, 1);
>+  tree immusestmt;
>+
>+  if (TREE_CODE (binlhs) == SSA_NAME
>+      && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR))
>+    return true;
>+
>+  if (TREE_CODE (binrhs) == SSA_NAME
>+      && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR))
>+    return true;
>+
>+  if (TREE_CODE (lhs) == SSA_NAME
>+      && (immusestmt = get_single_immediate_use (lhs))
>+      && TREE_CODE (TREE_OPERAND (immusestmt, 1)) == PLUS_EXPR)
>+    return true;
>+  return false;
>+
>+}
>+
>+/* Transform STMT from A - B into A + -B.  */
>+
>+static void
>+break_up_subtract (tree stmt, block_stmt_iterator *bsi)
>+{
>+  tree rhs = TREE_OPERAND (stmt, 1);
>+
>+  if (dump_file && (dump_flags & TDF_DETAILS))
>+    {
>+      fprintf (dump_file, "Breaking up subtract ");
>+      print_generic_stmt (dump_file, stmt, 0);
>+    }
>+
>+  TREE_SET_CODE (TREE_OPERAND (stmt, 1), PLUS_EXPR);
>+  TREE_OPERAND (rhs, 1) = negate_value (TREE_OPERAND (rhs, 1), bsi);
>+
>+  update_stmt (stmt);
>+}
>+
>+/* Recursively linearize a binary expression that is the RHS of STMT.
>+   Place the operands of the expression tree in the vector named OPS.  */
>+
>+static void
>+linearize_expr_tree (VEC(operand_entry_t, heap) **ops, tree stmt)
>+{
>+  block_stmt_iterator bsinow, bsilhs;
>+  tree rhs = TREE_OPERAND (stmt, 1);
>+  tree binrhs = TREE_OPERAND (rhs, 1);
>+  tree binlhs = TREE_OPERAND (rhs, 0);
>+  tree binlhsdef, binrhsdef;
>+  bool binlhsisreassoc = false;
>+  bool binrhsisreassoc = false;
>+  enum tree_code rhscode = TREE_CODE (rhs);
>+
>+  TREE_VISITED (stmt) = 1;
>+
>+  if (TREE_CODE (binlhs) == SSA_NAME)
>+    {
>+      binlhsdef = SSA_NAME_DEF_STMT (binlhs);
>+      binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode);
>+    }
>+
>+  if (TREE_CODE (binrhs) == SSA_NAME)
>+    {
>+      binrhsdef = SSA_NAME_DEF_STMT (binrhs);
>+      binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode);
>+    }
>+
>+  /* If the LHS is not reassociable, but the RHS is, we need to swap
>+     them.  If neither is reassociable, there is nothing we can do, so
>+     just put them in the ops vector.  If the LHS is reassociable,
>+     linearize it.  If both are reassociable, then linearize the RHS
>+     and the LHS.  */
>+
>+  if (!binlhsisreassoc)
>+    {
>+      tree temp;
>+
>+      if (!binrhsisreassoc)
>+	{
>+	  add_to_ops_vec (ops, binrhs);
>+	  add_to_ops_vec (ops, binlhs);
>+	  return;
>+	}
>+
>+      if (dump_file && (dump_flags & TDF_DETAILS))
>+	{
>+	  fprintf (dump_file, "swapping operands of ");
>+	  print_generic_expr (dump_file, stmt, 0);
>+	}
>+
>+      swap_tree_operands (stmt, &TREE_OPERAND (rhs, 0),
>+			  &TREE_OPERAND (rhs, 1));
>+      update_stmt (stmt);
>+
>+      if (dump_file && (dump_flags & TDF_DETAILS))
>+	{
>+	  fprintf (dump_file, " is now ");
>+	  print_generic_stmt (dump_file, stmt, 0);
>+	}
>+
>+      /* We want to make it so the lhs is always the reassociative op,
>+	 so swap.  */
>+      temp = binlhs;
>+      binlhs = binrhs;
>+      binrhs = temp;
>+    }
>+  else if (binrhsisreassoc)
>+    {
>+      linearize_expr (stmt);
>+      gcc_assert (rhs == TREE_OPERAND (stmt, 1));
>+      binlhs = TREE_OPERAND (rhs, 0);
>+      binrhs = TREE_OPERAND (rhs, 1);
>+    }
>+
>+  gcc_assert (TREE_CODE (binrhs) != SSA_NAME
>+	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode));
>+  bsinow = bsi_for_stmt (stmt);
>+  bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
>+  bsi_move_before (&bsilhs, &bsinow);
>+  linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
>+  add_to_ops_vec (ops, binrhs);
>+}
>+
>+/* Repropagate the negates back into subtracts, since no other pass
>+   currently does it.  */
>+
>+static void
>+repropagate_negates (void)
>+{
>+  unsigned int i = 0;
>+  tree negate;
>+
>+  for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++)
>+    {
>+      tree user = get_single_immediate_use (negate);
>+
>+      /* Due to linearization, the negate operand should now be an RHS
>+	 leaf of some PLUS expression.  I.E.
>+
>+	 d = -c
>+	 e = a + d
>+
>+	 So just repropagate it, transforming the PLUS_EXPR back into
>+	 a MINUS_EXPR.  */
>+
>+      if (user
>+	  && TREE_CODE (user) == MODIFY_EXPR
>+	  && TREE_CODE (TREE_OPERAND (user, 1)) == PLUS_EXPR
>+	  && TREE_OPERAND (TREE_OPERAND (user, 1), 1) == negate)
>+	{
>+	  tree rhs = TREE_OPERAND (user, 1);
>+	  TREE_SET_CODE (rhs, MINUS_EXPR);
>+	  TREE_OPERAND (rhs, 1) = get_unary_op (negate, NEGATE_EXPR);
>+	  update_stmt (user);
>+	}
>+    }
>+}
>+
>+/* Break up subtract operations in block BB.
>+
>+   We do this top down because we don't know whether the subtract is
>+   part of a possible chain of reassociation except at the top.
>+
>+   IE given
>+   d = f + g
>+   c = a + e
>+   b = c - d
>+   q = b - r
>+   k = t - q
>+
>+   we want to break up k = t - q, but we won't until we've transformed q
>+   = b - r, which won't be broken up until we transform b = c - d.  */
>+
>+static void
>+break_up_subtract_bb (basic_block bb)
>+{
>+  block_stmt_iterator bsi;
>+  basic_block son;
>+
>+  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
>+    {
>+      tree stmt = bsi_stmt (bsi);
>+
>+      if (TREE_CODE (stmt) == MODIFY_EXPR)
>+	{
>+	  tree lhs = TREE_OPERAND (stmt, 0);
>+	  tree rhs = TREE_OPERAND (stmt, 1);
>+
>+	  TREE_VISITED (stmt) = 0;
>+	  /* If unsafe math optimizations we can do reassociation for
>+	     non-integral types.  */
>+	  if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>+	       || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
>+	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
>+		  || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
>+		  || !flag_unsafe_math_optimizations))
>+	    continue;
>+
>+	  /* Check for a subtract used only in an addition.  If this
>+	     is the case, transform it into add of a negate for better
>+	     reassociation.  IE transform C = A-B into C = A + -B if C
>+	     is only used in an addition.  */
>+	  if (TREE_CODE (rhs) == MINUS_EXPR)
>+	    if (should_break_up_subtract (stmt))
>+	      break_up_subtract (stmt, &bsi);
>+	}
>+    }
>+  for (son = first_dom_son (CDI_DOMINATORS, bb);
>+       son;
>+       son = next_dom_son (CDI_DOMINATORS, son))
>+    break_up_subtract_bb (son);
>+}
>+
>+/* Reassociate expressions in basic block BB and its post-dominator as
>+   children.  */
>+
>+static void
>+reassociate_bb (basic_block bb)
>+{
>+  block_stmt_iterator bsi;
>+  basic_block son;
>+
>+  for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi))
>+    {
>+      tree stmt = bsi_stmt (bsi);
>+
>+      if (TREE_CODE (stmt) == MODIFY_EXPR)
>+	{
>+	  tree lhs = TREE_OPERAND (stmt, 0);
>+	  tree rhs = TREE_OPERAND (stmt, 1);
>+
>+	  /* If this was part of an already processed tree, we don't
>+	     need to touch it again. */
>+	  if (TREE_VISITED (stmt))
>+	    continue;
>+
>+	  /* If unsafe math optimizations we can do reassociation for
>+	     non-integral types.  */
>+	  if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>+	       || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
>+	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
>+		  || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
>+		  || !flag_unsafe_math_optimizations))
>+	    continue;
>+
>+	  if (associative_tree_code (TREE_CODE (rhs)))
>+	    {
>+	      VEC(operand_entry_t, heap) *ops = NULL;
>+
>+	      /* There may be no immediate uses left by the time we
>+		 get here because we may have eliminated them all.  */
>+	      if (TREE_CODE (lhs) == SSA_NAME && num_imm_uses (lhs) == 0)
>+		continue;
>+
>+	      TREE_VISITED (stmt) = 1;
>+	      linearize_expr_tree (&ops, stmt);
>+	      qsort (VEC_address (operand_entry_t, ops),
>+		     VEC_length (operand_entry_t, ops),
>+		     sizeof (operand_entry_t),
>+		     sort_by_operand_rank);
>+	      optimize_ops_list (TREE_CODE (rhs), &ops);
>+
>+	      if (VEC_length (operand_entry_t, ops) == 1)
>+		{
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    {
>+		      fprintf (dump_file, "Transforming ");
>+		      print_generic_expr (dump_file, rhs, 0);
>+		    }
>+		  TREE_OPERAND (stmt, 1) = VEC_last (operand_entry_t, ops)->op;
>+		  update_stmt (stmt);
>+
>+		  if (dump_file && (dump_flags & TDF_DETAILS))
>+		    {
>+		      fprintf (dump_file, " into ");
>+		      print_generic_stmt (dump_file,
>+					  TREE_OPERAND (stmt, 1), 0);
>+		    }
>+		}
>+	      else
>+		{
>+		  /*rewrite_expr_tree_new (stmt, TREE_CODE (rhs), &ops);*/
>+		  rewrite_expr_tree (stmt, 0, ops);
>+		}
>+
>+	      VEC_free (operand_entry_t, heap, ops);
>+	    }
>+	}
>+    }
>+  for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
>+       son;
>+       son = next_dom_son (CDI_POST_DOMINATORS, son))
>+    reassociate_bb (son);
>+}
>+
>+void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
>+void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
>+
>+/* Dump the operand entry vector OPS to FILE.  */
>+
>+void
>+dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
>+{
>+  operand_entry_t oe;
>+  unsigned int i;
>+
>+  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>+    {
>+      fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
>+      print_generic_stmt (file, oe->op, 0);
>+    }
>+}
>+
>+/* Dump the operand entry vector OPS to STDERR.  */
>+
>+void
>+debug_ops_vector (VEC (operand_entry_t, heap) *ops)
>+{
>+  dump_ops_vector (stderr, ops);
>+}
>+
>+static void
>+do_reassoc (void)
>+{
>+  break_up_subtract_bb (ENTRY_BLOCK_PTR);
>+  reassociate_bb (EXIT_BLOCK_PTR);
>+}
>+
>+/* Initialize the reassociation pass.  */
>+
>+static void
>+init_reassoc (void)
>+{
>+  int i;
>+  unsigned int rank = 2;
>+  tree param;
>+  int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int));
>+
>+  memset (&reassociate_stats, 0, sizeof (reassociate_stats));
>+
>+  operand_entry_pool = create_alloc_pool ("operand entry pool",
>+					  sizeof (struct operand_entry), 30);
>+
>+  /* Reverse RPO (Reverse Post Order) will give us something where
>+     deeper loops come later.  */
>+  flow_depth_first_order_compute (NULL, bbs);
>+  bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int));
>+
>+  operand_rank = htab_create (511, operand_entry_hash,
>+			      operand_entry_eq, 0);
>+
>+  /* Give each argument a distinct rank.   */
>+  for (param = DECL_ARGUMENTS (current_function_decl);
>+       param;
>+       param = TREE_CHAIN (param))
>+    {
>+      if (default_def (param) != NULL)
>+	{
>+	  tree def = default_def (param);
>+	  insert_operand_rank (def, ++rank);
>+	}
>+    }
>+
>+  /* Give the chain decl a distinct rank. */
>+  if (cfun->static_chain_decl != NULL)
>+    {
>+      tree def = default_def (cfun->static_chain_decl);
>+      if (def != NULL)
>+	insert_operand_rank (def, ++rank);
>+    }
>+
>+  /* Set up rank for each BB  */
>+  for (i = 0; i < n_basic_blocks; i++)
>+    bb_rank[bbs[i]] = ++rank  << 16;
>+
>+  free (bbs);
>+  calculate_dominance_info (CDI_DOMINATORS);
>+  calculate_dominance_info (CDI_POST_DOMINATORS);
>+  broken_up_subtracts = NULL;
>+}
>+
>+/* Cleanup after the reassociation pass, and print stats if
>+   requested.  */
>+
>+static void
>+fini_reassoc (void)
>+{
>+
>+  if (dump_file && (dump_flags & TDF_STATS))
>+    {
>+      fprintf (dump_file, "Reassociation stats:\n");
>+      fprintf (dump_file, "Linearized: %d\n", reassociate_stats.linearized);
>+      fprintf (dump_file, "Constants eliminated: %d\n",
>+	       reassociate_stats.constants_eliminated);
>+      fprintf (dump_file, "Ops eliminated: %d\n",
>+	       reassociate_stats.ops_eliminated);
>+      fprintf (dump_file, "Statements rewritten: %d\n",
>+	       reassociate_stats.rewritten);
>+    }
>+  htab_delete (operand_rank);
>+
>+  free_alloc_pool (operand_entry_pool);
>+  free (bb_rank);
>+  VEC_free (tree, heap, broken_up_subtracts);
>+}
>+
>+/* Gate and execute functions for Reassociation.  */
>+
>+static void
>+execute_reassoc (void)
>+{
>+  init_reassoc ();
>+
>+  do_reassoc ();
>+  repropagate_negates ();
>+
>   fini_reassoc ();
> }
> 
>@@ -635,7 +1566,6 @@ struct tree_opt_pass pass_reassoc =
>   0,					/* properties_provided */
>   0,					/* properties_destroyed */
>   0,					/* todo_flags_start */
>-  TODO_update_ssa | TODO_dump_func 
>-  | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
>+  TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
>   0					/* letter */
> };

Actions: View | Diff

Attachments on bug 113541: 73558 | 73559 | 73560 | 73561 | 73562