14 files changed, 242 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fda8cb53ddf..6e1633b4571 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,32 @@
+2003-02-26  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+
+	* cse.c (count_reg_usage): Fix handling of REG_EQUAL notes.
+
+	* Makefile.in (loop-unroll.o): New.
+	* cfgloop.h (UAP_PEEL, UAP_UNROLL, UAP_UNROLL_ALL): New.
+	(unroll_and_peel_loops): Declare.
+	* alias.c (init_alias_analysis): Flag_unroll_loops renamed to
+	flag_old_unroll_loops.
+	* loop.c (loop_invariant_p): Ditto.
+	* unroll.c (unroll_loop): Flag_unroll_all_loops renamed to
+	flag_old_unroll_all_loops.
+	* flags.h (flag_unroll_loops): Renamed to flag_old_unroll_loops.
+	(flag_unroll_all_loops): Renamed to flag_old_unroll_all_loops.
+	* params.def (PARAM_MAX_UNROLLED_INSNS): Default value changed.
+	(PARAM_MAX_AVERAGE_UNROLLED_INSNS, PARAM_MAX_UNROLL_TIMES,
+	PARAM_MAX_PEELED_INSNS, PARAM_MAX_PEEL_TIMES,
+	PARAM_MAX_COMPLETELY_PEELED_INSNS, PARAM_MAX_COMPLETELY_PEEL_TIMES,
+	PARAM_MAX_ONCE_PEELED_INSNS): New.
+	* toplev.h (flag_old_unroll_loops, flag_old_unroll_all_loops): New.
+	(flag_unroll_loops, flag_unroll_all_loops): Used for new unroller
+	instead of old one.
+	(flag_peel_loops): New.
+	(lang_independent_options): The new flags added.
+	(rest_of_compilation): Call new unroller.
+	(process_options): Setup flags for coexistence of old and new unroller.
+	* doc/invoke.texi: Document new options.
+	* doc/passes.texi: Document new unroller pass.
+
 2003-02-26  David Billinghurst <David.Billinghurst@riotinto.com>
 
 	* fixinc/fixincl.x: Regenerate
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index ceb594bf1ee..494a14b48d4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -74,7 +74,7 @@ XCFLAGS =
 TCFLAGS =
 CFLAGS = -g
 STAGE1_CFLAGS = -g @stage1_cflags@
-BOOT_CFLAGS = -g -O2
+BOOT_CFLAGS = -g -O2 -funroll-loops -fpeel-loops
 
 # Flags to determine code coverage. When coverage is disabled, this will
 # contain the optimization flags, as you normally want code coverage
@@ -768,7 +768,7 @@ C_OBJS = c-parse.o c-lang.o c-pretty-print.o $(C_AND_OBJC_OBJS)
 
 OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	   \
  cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
- cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o		   \
+ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
  cfgrtl.o combine.o conflict.o convert.o cse.o cselib.o dbxout.o	   \
  debug.o df.o diagnostic.o doloop.o dominance.o		                   \
  dwarf2asm.o dwarf2out.o dwarfout.o emit-rtl.o except.o explow.o	   \
@@ -1612,6 +1612,9 @@ loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) gcov-io.h \
 loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
    $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \
    output.h $(EXPR_H) coretypes.h $(TM_H)
+loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
+   $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \
+   output.h $(EXPR_H) coretypes.h $(TM_H)
 dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
 et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
diff --git a/gcc/alias.c b/gcc/alias.c
index 23f41341844..de8d8d7c221 100644
--- a/gcc/alias.c
+++ b/gcc/alias.c
@@ -2763,7 +2763,7 @@ init_alias_analysis ()
 
   new_reg_base_value = (rtx *) xmalloc (reg_base_value_size * sizeof (rtx));
   reg_seen = (char *) xmalloc (reg_base_value_size);
-  if (! reload_completed && flag_unroll_loops)
+  if (! reload_completed && flag_old_unroll_loops)
     {
       /* ??? Why are we realloc'ing if we're just going to zero it?  */
       alias_invariant = (rtx *)xrealloc (alias_invariant,
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index c46602f790a..ae1e5290e40 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -337,3 +337,11 @@ extern void loop_optimizer_finalize	PARAMS ((struct loops *, FILE *));
 /* Optimization passes.  */
 extern void unswitch_loops		PARAMS ((struct loops *));
 
+enum
+{
+  UAP_PEEL = 1,		/* Enables loop peeling.  */
+  UAP_UNROLL = 2,	/* Enables peeling of loops if it seems profitable.  */
+  UAP_UNROLL_ALL = 4	/* Enables peeling of all loops.  */
+};
+
+extern void unroll_and_peel_loops	PARAMS ((struct loops *, int));
diff --git a/gcc/cse.c b/gcc/cse.c
index 6c763f48792..6321c7c6dd9 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -7459,6 +7459,7 @@ count_reg_usage (x, counts, dest, incr)
      int incr;
 {
   enum rtx_code code;
+  rtx note;
   const char *fmt;
   int i, j;
 
@@ -7516,16 +7517,13 @@ count_reg_usage (x, counts, dest, incr)
       /* Things used in a REG_EQUAL note aren't dead since loop may try to
 	 use them.  */
 
-      count_reg_usage (REG_NOTES (x), counts, NULL_RTX, incr);
+      note = find_reg_equal_equiv_note (x);
+      if (note)
+        count_reg_usage (XEXP (note, 0), counts, NULL_RTX, incr);
       return;
 
-    case EXPR_LIST:
     case INSN_LIST:
-      if (REG_NOTE_KIND (x) == REG_EQUAL
-	  || (REG_NOTE_KIND (x) != REG_NONNEG && GET_CODE (XEXP (x,0)) == USE))
-	count_reg_usage (XEXP (x, 0), counts, NULL_RTX, incr);
-      count_reg_usage (XEXP (x, 1), counts, NULL_RTX, incr);
-      return;
+      abort ();
 
     default:
       break;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ff9a9a7642e..747f0036957 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -291,7 +291,8 @@ in the following sections.
 -fsched2-use-traces  -fsignaling-nans @gol
 -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
 -fstrength-reduce  -fstrict-aliasing  -ftracer -fthread-jumps @gol
--funit-at-a-time -funroll-all-loops  -funroll-loops  -funswitch-loops @gol
+-funroll-all-loops  -funroll-loops -fpeel-loops -funswitch-loops @gol
+-fold-unroll-loops -fold-unroll-all-loops @gol
 --param @var{name}=@var{value}
 -O  -O0  -O1  -O2  -O3  -Os}
 
@@ -4292,17 +4293,50 @@ extra optimizations to take place but consumes more memory.
 
 @item -funroll-loops
 @opindex funroll-loops
-Unroll loops whose number of iterations can be determined at compile
-time or upon entry to the loop.  @option{-funroll-loops} implies both
-@option{-fstrength-reduce} and @option{-frerun-cse-after-loop}.  This
-option makes code larger, and may or may not make it run faster.
+Unroll loops whose number of iterations can be determined at compile time or
+upon entry to the loop.  @option{-funroll-loops} implies
+@option{-frerun-cse-after-loop}.  It also turns on complete loop peeling
+(i.e. complete removal of loops with small constant number of iterations).
+This option makes code larger, and may or may not make it run faster.
 
 @item -funroll-all-loops
 @opindex funroll-all-loops
 Unroll all loops, even if their number of iterations is uncertain when
 the loop is entered.  This usually makes programs run more slowly.
 @option{-funroll-all-loops} implies the same options as
-@option{-funroll-loops},
+@option{-funroll-loops}.
+
+@item -fpeel-loops
+@opindex fpeel-loops
+Peels the loops for that there is enough information that they do not
+roll much (from profile feedback).  It also turns on complete loop peeling
+(i.e. complete removal of loops with small constant number of iterations).
+
+@item -funswitch-loops
+@opindex funswitch-loops
+Move branches with loop invariant conditions out of the loop, with duplicates
+of the loop on both branches (modified according to result of the condition).
+
+@item -fold-unroll-loops
+@opindex fold-unroll-loops
+Unroll loops whose number of iterations can be determined at compile
+time or upon entry to the loop, using the old loop unroller whose loop
+recognition is based on notes from frontend.  @option{-fold-unroll-loops} implies
+both @option{-fstrength-reduce} and @option{-frerun-cse-after-loop}.  This
+option makes code larger, and may or may not make it run faster.
+
+@item -fold-unroll-all-loops
+@opindex fold-unroll-all-loops
+Unroll all loops, even if their number of iterations is uncertain when
+the loop is entered. This is done using the old loop unroller whose loop
+recognition is based on notes from frontend.  This usually makes programs run more slowly.
+@option{-fold-unroll-all-loops} implies the same options as
+@option{-fold-unroll-loops}.
+ 
+@item -funswitch-loops
+@opindex funswitch-loops
+Move branches with loop invariant conditions out of the loop, with duplicates
+of the loop on both branches (modified according to result of the condition).
 
 @item -funswitch-loops
 @opindex funswitch-loops
@@ -4418,6 +4452,28 @@ The maximum number of instructions that a loop should have if that loop
 is unrolled, and if the loop is unrolled, it determines how many times
 the loop code is unrolled.
 
+@item max-average-unrolled-insns
+The maximum number of instructions biased by probabilities of their execution
+that a loop should have if that loop is unrolled, and if the loop is unrolled,
+it determines how many times the loop code is unrolled.
+
+@item max-unroll-times
+The maximum number of unrollings of a single loop.
+
+@item max-peeled-insns
+The maximum number of instructions that a loop should have if that loop
+is peeled, and if the loop is peeled, it determines how many times
+the loop code is peeled.
+
+@item max-peel-times
+The maximum number of peelings of a single loop.
+
+@item max-completely-peeled-insns
+The maximum number of insns of a completely peeled loop.
+
+@item max-completely-peel-times
+The maximum number of iterations of a loop to be suitable for complete peeling.
+
 @item max-unswitch-insns
 The maximum number of insns of an unswitched loop.
 
diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi
index ae8b92cdff7..10cc3815c98 100644
--- a/gcc/doc/passes.texi
+++ b/gcc/doc/passes.texi
@@ -334,10 +334,11 @@ some functions in @file{integrate.c} and the header @file{integrate.h}.
 Loop dependency analysis routines are contained in @file{dependence.c}.
 
 Second loop optimization pass takes care of basic block level optimalizations --
-unswitching loops. The source files are
+unrolling, peeling and unswitching loops. The source files are
 @file{cfgloopanal.c} and @file{cfgloopmanip.c} containing generic loop
 analysis and manipulation code, @file{loop-init.c} with initialization and
-finalization code, @file{loop-unswitch.c} for loop unswitching.
+finalization code, @file{loop-unswitch.c} for loop unswitching and
+@file{loop-unroll.c} for loop unrolling and peeling.
 
 @opindex dL
 The option @option{-dL} causes a debugging dump of the RTL code after
diff --git a/gcc/flags.h b/gcc/flags.h
index 559f70fd6b8..4a806896901 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -282,12 +282,12 @@ extern int flag_strength_reduce;
    UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
    unrolled.  */
 
-extern int flag_unroll_loops;
+extern int flag_old_unroll_loops;
 
 /* Nonzero enables loop unrolling in unroll.c.  All loops are unrolled.
    This is generally not a win.  */
 
-extern int flag_unroll_all_loops;
+extern int flag_old_unroll_all_loops;
 
 /* Nonzero forces all invariant computations in loops to be moved
    outside the loop.  */
diff --git a/gcc/loop.c b/gcc/loop.c
index 1b2acec67af..48ca0023863 100644
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -3245,7 +3245,7 @@ loop_invariant_p (loop, x)
 
 	 We don't know the loop bounds here though, so just fail for all
 	 labels.  */
-      if (flag_unroll_loops)
+      if (flag_old_unroll_loops)
 	return 0;
       else
 	return 1;
diff --git a/gcc/params.def b/gcc/params.def
index 66d823164d2..a0744f9b3ad 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -149,7 +149,43 @@ DEFPARAM(PARAM_MAX_GCSE_PASSES,
 DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
 	 "max-unrolled-insns",
 	 "The maximum number of instructions to consider to unroll in a loop",
-	 100)
+	 200)
+/* This parameter limits how many times the loop is unrolled depending
+   on number of insns really executed in each iteration.  */
+DEFPARAM(PARAM_MAX_AVERAGE_UNROLLED_INSNS,
+	 "max-average-unrolled-insns",
+	 "The maximum number of instructions to consider to unroll in a loop on average",
+	 80)
+/* The maximum number of unrollings of a single loop.  */
+DEFPARAM(PARAM_MAX_UNROLL_TIMES,
+	"max-unroll-times",
+	"The maximum number of unrollings of a single loop",
+	8)
+/* The maximum number of insns of a peeled loop.  */
+DEFPARAM(PARAM_MAX_PEELED_INSNS,
+	"max-peeled-insns",
+	"The maximum number of insns of a peeled loop",
+	120)
+/* The maximum number of peelings of a single loop.  */
+DEFPARAM(PARAM_MAX_PEEL_TIMES,
+	"max-peel-times",
+	"The maximum number of peelings of a single loop",
+	16)
+/* The maximum number of insns of a peeled loop.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
+	"max-completely-peeled-insns",
+	"The maximum number of insns of a completely peeled loop",
+	120)
+/* The maximum number of peelings of a single loop that is peeled completely.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
+	"max-completely-peel-times",
+	"The maximum number of peelings of a single loop that is peeled completely",
+	16)
+/* The maximum number of insns of a peeled loop that rolls only once.  */
+DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS,
+	"max-once-peeled-insns",
+	"The maximum number of insns of a peeled loop that rolls only once",
+	200)
 
 /* The maximum number of insns of an unswitched loop.  */
 DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 12fce54444d..bcdaf536209 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2003-02-26  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+
+	* lib/scanasm.exp: Add support for counting numbers of
+	occurences.
+	* gcc.dg/unswitch-1.c, gcc.dg/peel-1.c, gcc.dg/unroll-1.c,
+	gcc.dg/unroll-2.c, gcc.dg/unroll-3.c: New tests.
+
 2003-02-25  Mark Mitchell  <mark@codesourcery.com>
 
 	PR c++/9683
diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
index ebca4caac75..774ec4a7ecd 100644
--- a/gcc/testsuite/lib/scanasm.exp
+++ b/gcc/testsuite/lib/scanasm.exp
@@ -56,6 +56,44 @@ proc scan-assembler { args } {
     }
 }
 
+# Call pass if pattern is present given number of times, otherwise fail.
+proc scan-assembler-times { args } {
+    if { [llength $args] < 2 } {
+	error "scan-assembler: too few arguments"
+        return
+    }
+    if { [llength $args] > 3 } {
+	error "scan-assembler: too many arguments"
+	return
+    }
+    if { [llength $args] >= 3 } {
+	switch [dg-process-target [lindex $args 2]] {
+	    "S" { }
+	    "N" { return }
+	    "F" { error "scan-assembler: `xfail' not allowed here" }
+	    "P" { error "scan-assembler: `xfail' not allowed here" }
+	}
+    }
+
+    # This assumes that we are two frames down from dg-test, and that
+    # it still stores the filename of the testcase in a local variable "name".
+    # A cleaner solution would require a new dejagnu release.
+    upvar 2 name testcase
+
+    # This must match the rule in gcc-dg.exp.
+    set output_file "[file rootname [file tail $testcase]].s"
+
+    set fd [open $output_file r]
+    set text [read $fd]
+    close $fd
+
+    if { [llength [regexp -inline -all -- [lindex $args 0] $text]] == [lindex $args 1]} {
+	pass "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+    } else {
+	fail "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+    }
+}
+
 # Call pass if pattern is not present, otherwise fail.
 proc scan-assembler-not { args } {
     if { [llength $args] < 1 } {
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 89324090f61..ae068d43472 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -515,13 +515,22 @@ int flag_strength_reduce = 0;
    UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
    unrolled.  */
 
-int flag_unroll_loops;
+int flag_old_unroll_loops;
 
 /* Nonzero enables loop unrolling in unroll.c.  All loops are unrolled.
    This is generally not a win.  */
 
+int flag_old_unroll_all_loops;
+
+/* Enables unrolling of simple loops in loop-unroll.c.  */
+int flag_unroll_loops;
+
+/* Enables unrolling of all loops in loop-unroll.c.  */
 int flag_unroll_all_loops;
 
+/* Nonzero enables loop peeling.  */
+int flag_peel_loops;
+
 /* Nonzero enables loop unswitching.  */
 int flag_unswitch_loops;
 
@@ -1016,6 +1025,12 @@ static const lang_independent_options f_options[] =
    N_("Perform loop unrolling when iteration count is known") },
   {"unroll-all-loops", &flag_unroll_all_loops, 1,
    N_("Perform loop unrolling for all loops") },
+  {"old-unroll-loops", &flag_old_unroll_loops, 1,
+   N_("Perform loop unrolling when iteration count is known") },
+  {"old-unroll-all-loops", &flag_old_unroll_all_loops, 1,
+   N_("Perform loop unrolling for all loops") },
+  {"peel-loops", &flag_peel_loops, 1,
+   N_("Perform loop peeling") },
   {"unswitch-loops", &flag_unswitch_loops, 1,
    N_("Perform loop unswitching") },
   {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
@@ -2950,7 +2965,10 @@ rest_of_compilation (decl)
       /* CFG is no longer maintained up-to-date.  */
       free_bb_for_insn ();
 
-      do_unroll = flag_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
+      if (flag_unroll_loops)
+	do_unroll = 0;		/* Having two unrollers is useless.  */
+      else
+	do_unroll = flag_old_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
       do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
       if (flag_rerun_loop_opt)
 	{
@@ -3090,7 +3108,9 @@ rest_of_compilation (decl)
   /* Perform loop optimalizations.  It might be better to do them a bit
      sooner, but we want the profile feedback to work more efficiently.  */
   if (optimize > 0
-      && flag_unswitch_loops)
+      && (flag_unswitch_loops
+	  || flag_peel_loops
+	  || flag_unroll_loops))
     {
       struct loops *loops;
       timevar_push (TV_LOOP);
@@ -3106,6 +3126,12 @@ rest_of_compilation (decl)
 	  if (flag_unswitch_loops)
 	    unswitch_loops (loops);
 
+ 	  if (flag_peel_loops || flag_unroll_loops)
+ 	    unroll_and_peel_loops (loops,
+		(flag_peel_loops ? UAP_PEEL : 0) |
+		(flag_unroll_loops ? UAP_UNROLL : 0) |
+		(flag_unroll_all_loops ? UAP_UNROLL_ALL : 0));
+
 	  loop_optimizer_finalize (loops, rtl_dump_file);
 	}
 
@@ -5134,15 +5160,27 @@ process_options ()
      be done.  */
   if (flag_unroll_all_loops)
     flag_unroll_loops = 1;
-  /* Loop unrolling requires that strength_reduction be on also.  Silently
+
+  if (flag_unroll_loops)
+    {
+      flag_old_unroll_loops = 0;
+      flag_old_unroll_all_loops = 0;
+    }
+
+  if (flag_old_unroll_all_loops)
+    flag_old_unroll_loops = 1;
+
+  /* Old loop unrolling requires that strength_reduction be on also.  Silently
      turn on strength reduction here if it isn't already on.  Also, the loop
      unrolling code assumes that cse will be run after loop, so that must
      be turned on also.  */
-  if (flag_unroll_loops)
+  if (flag_old_unroll_loops)
     {
       flag_strength_reduce = 1;
       flag_rerun_cse_after_loop = 1;
     }
+  if (flag_unroll_loops || flag_peel_loops)
+    flag_rerun_cse_after_loop = 1;
 
   if (flag_non_call_exceptions)
     flag_asynchronous_unwind_tables = 1;
diff --git a/gcc/unroll.c b/gcc/unroll.c
index 849b1b58263..982b32dfbf2 100644
--- a/gcc/unroll.c
+++ b/gcc/unroll.c
@@ -1123,7 +1123,7 @@ unroll_loop (loop, insn_count, strength_reduce_p)
 
   /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
      the loop unless all loops are being unrolled.  */
-  if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
+  if (unroll_type == UNROLL_NAIVE && ! flag_old_unroll_all_loops)
     {
       if (loop_dump_stream)
 	fprintf (loop_dump_stream,