rsha Jagasia <harsha.jagasia@amd.com>

Jan Sjodin <jan.sjodin@amd.com> * tree-vect-analyze.c (vect_analyze_operations): Change comparison of loop iterations with threshold to less than or equal to instead of less than. Reduce min_scalar_loop_bound by one. * tree-vect-transform.c (vect_estimate_min_profitable_iters): Change prologue and epilogue iterations estimate to vf/2, when unknown at compile-time. Change versioning guard cost to taken_branch_cost. If peeling for alignment is unknown at compile-time, change peel guard costs to one taken branch and one not-taken branch per peeled loop. If peeling for alignment is known but number of scalar loop iterations is unknown at compile-time, change peel guard costs to one taken branch per peeled loop. Change the cost model equation to consider vector iterations as the loop iterations less the prologue and epilogue iterations. Change outside vector cost check to less than or equal to zero instead of equal to zero. (vect_do_peeling_for_loop_bound): Reduce min_scalar_loop_bound by one. * tree-vectorizer.h: Add TARG_COND_TAKEN_BRANCH_COST and TARG_COND_NOT_TAKEN_BRANCH_COST. * config/i386/i386.h (processor_costs): Add scalar_stmt_cost, scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost, vec_align_load_cost, vect_unalign_load_cost, vec_store_cost, cond_taken_branch_cost, cond_not_taken_branch_cost. Define macros for x86 costs. * config/i386/i386.c: (size_cost): Set scalar_stmt_cost, scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost, vec_align_load_cost, vect_unalign_load_cost, vec_store_cost, cond_taken_branch_cost, cond_not_taken_branch_cost to one. (i386_cost, i486_cost, pentium_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, pentium4_cost, nocona_cost, core2_cost, generic64_cost, generic32_cost): Set to default untuned costs. (k8_cost, amdfam10_cost): Costs for vectorization tuned. (x86_builtin_vectorization_cost): New. 2007-09-10 Harsha Jagasia <harsha.jagasia@amd.com> * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Change dg-final to expect 1 non-profitable loop and 3 profitable loops. * gcc.dg/vect/costmodel/x86-64/costmodel-vect-31.c: Change dg-final to expect 1 non-profitable loop and 3 profitable loops. * gcc.dg/vect/costmodel/x86-64/costmodel-fast-math-vect-pr29925.c: Change dg-final to expect 1 profitable loop. * gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c: Change dg-final to expect 1 profitable loop. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128353 138bc75d-0d04-0410-961f-82ee72b054a4
author: hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-09-11 00:13:47 +0000
committer: hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-09-11 00:13:47 +0000
commit: 6202d4dbe64fab3147f67e6c836249f7e31ddd6c (patch)
tree: ac4783aa576a9af7b40d78f139e042b91e781996 /gcc/tree-vect-transform.c
parent: c8ac5d9a0464767d7091606c4d55aaaf8edc511a (diff)
download: gcc-6202d4dbe64fab3147f67e6c836249f7e31ddd6c.tar.gz
1 files changed, 37 insertions, 42 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 30dbf712e55..e2ee92b0d0e 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop->num_nodes;
   int byte_misalign;
+  int peel_guard_costs = 0;
   int innerloop_iters = 0, factor;
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
@@ -141,7 +142,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
   if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
     {
-      vec_outside_cost += TARG_COND_BRANCH_COST;
+      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
                  "versioning.\n");
@@ -188,7 +189,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
      loop.
 
      FORNOW: If we dont know the value of peel_iters for prologue or epilogue
-     at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
+     at compile-time - we assume it's vf/2 (the worst would be vf-1).
 
      TODO: Build an expression that represents peel_iters for prologue and
      epilogue to be used in a run-time test.  */
@@ -197,18 +198,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
   if (byte_misalign < 0)
     {
-      peel_iters_prologue = (vf - 1)/2;
+      peel_iters_prologue = vf/2;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "cost model: "
-                 "prologue peel iters set to (vf-1)/2.");
+                 "prologue peel iters set to vf/2.");
 
       /* If peeling for alignment is unknown, loop bound of main loop becomes
          unknown.  */
-      peel_iters_epilogue = (vf - 1)/2;
+      peel_iters_epilogue = vf/2;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "cost model: "
-                 "epilogue peel iters set to (vf-1)/2 because "
+                 "epilogue peel iters set to vf/2 because "
                  "peeling for alignment is unknown .");
+
+      /* If peeled iterations are unknown, count a taken branch and a not taken
+	 branch per peeled loop. Even if scalar loop iterations are known, 
+	 vector iterations are not known since peeled prologue iterations are
+	 not known. Hence guards remain the same.  */
+      peel_guard_costs +=  2 * (TARG_COND_TAKEN_BRANCH_COST
+			       + TARG_COND_NOT_TAKEN_BRANCH_COST);
+
     }
   else 
     {
@@ -226,11 +235,16 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
       if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
         {
-          peel_iters_epilogue = (vf - 1)/2;
+          peel_iters_epilogue = vf/2;
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "cost model: "
-                     "epilogue peel iters set to (vf-1)/2 because "
+                     "epilogue peel iters set to vf/2 because "
                      "loop iterations are unknown .");
+
+	  /* If peeled iterations are known but number of scalar loop
+	     iterations are unknown, count a taken branch per peeled loop.  */
+	  peel_guard_costs +=  2 * TARG_COND_TAKEN_BRANCH_COST;
+
         }
       else      
 	{
@@ -241,33 +255,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 	}
     }
 
-  /* Requires a prologue loop when peeling to handle misalignment. Add cost of
-     two guards, one for the peeled loop and one for the vector loop.  */
-
-  if (peel_iters_prologue)
-    {
-      vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "cost model: Adding cost of checks for "
-                 "prologue.\n");
-    }
-
- /* Requires an epilogue loop to finish up remaining iterations after vector
-    loop. Add cost of two guards, one for the peeled loop and one for the
-    vector loop.  */
-
-  if (peel_iters_epilogue
-      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
-    {
-      vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "cost model : Adding cost of checks for "
-                 "epilogue.\n");
-    }
-
   vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
-                      + (peel_iters_epilogue * scalar_single_iter_cost);
+                      + (peel_iters_epilogue * scalar_single_iter_cost)
+                      + peel_guard_costs;
 
   /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
      information we provide for the target is whether testing against the
@@ -305,11 +295,13 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
     {
-      if (vec_outside_cost == 0)
+      if (vec_outside_cost <= 0)
         min_profitable_iters = 1;
       else
         {
-          min_profitable_iters = (vec_outside_cost * vf)
+          min_profitable_iters = (vec_outside_cost * vf 
+                                  - vec_inside_cost * peel_iters_prologue
+                                  - vec_inside_cost * peel_iters_epilogue)
                                  / ((scalar_single_iter_cost * vf)
                                     - vec_inside_cost);
 
@@ -344,8 +336,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
                peel_iters_epilogue);
       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
 	       min_profitable_iters);
-      fprintf (vect_dump, "  Actual minimum iters for profitability: %d\n",
-	       min_profitable_iters < vf ? vf : min_profitable_iters);
     }
 
   min_profitable_iters = 
@@ -355,6 +345,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
      if (niters <= min_profitable_iters)
        then skip the vectorized loop.  */
   min_profitable_iters--;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "  Profitability threshold = %d\n",
+	     min_profitable_iters);
+    
   return min_profitable_iters;
 }
 
@@ -6452,8 +6447,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
 
   /* Analyze cost to set threshhold for vectorized loop.  */
   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
-  min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
-                          * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+			    * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
 
   /* Use the cost model only if it is more conservative than user specified
      threshold.  */
@@ -6464,8 +6459,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
           || min_profitable_iters > min_scalar_loop_bound))
     th = (unsigned) min_profitable_iters;
 
-  if (min_profitable_iters
-      && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+  if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "vectorization may not be profitable.");
author	hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-09-11 00:13:47 +0000
committer	hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-09-11 00:13:47 +0000
commit	6202d4dbe64fab3147f67e6c836249f7e31ddd6c (patch)
tree	ac4783aa576a9af7b40d78f139e042b91e781996 /gcc/tree-vect-transform.c
parent	c8ac5d9a0464767d7091606c4d55aaaf8edc511a (diff)
download	gcc-6202d4dbe64fab3147f67e6c836249f7e31ddd6c.tar.gz