summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/sort.pm86
-rw-r--r--lib/sort.t4
-rw-r--r--perl.h5
-rw-r--r--pp_sort.c20
4 files changed, 53 insertions, 62 deletions
diff --git a/lib/sort.pm b/lib/sort.pm
index c1ea71cc80..c9ef3faaf9 100644
--- a/lib/sort.pm
+++ b/lib/sort.pm
@@ -8,9 +8,6 @@ $sort::quicksort_bit = 0x00000001;
$sort::mergesort_bit = 0x00000002;
$sort::sort_bits = 0x000000FF; # allow 256 different ones
$sort::stable_bit = 0x00000100;
-$sort::insensitive_bit = 0x00000200;
-$sort::safe_bits = 0x00000300;
-$sort::fast_bit = 0x00000400;
use strict;
@@ -24,24 +21,14 @@ sub import {
local $_;
no warnings 'uninitialized'; # $^H{SORT} bitops would warn
while ($_ = shift(@_)) {
- if (/^q(?:uick)?sort$/) {
+ if (/^_q(?:uick)?sort$/) {
$^H{SORT} &= ~$sort::sort_bits;
$^H{SORT} |= $sort::quicksort_bit;
- return;
- } elsif ($_ eq 'mergesort') {
+ } elsif ($_ eq '_mergesort') {
$^H{SORT} &= ~$sort::sort_bits;
$^H{SORT} |= $sort::mergesort_bit;
- return;
- } elsif ($_ eq 'safe') {
- $^H{SORT} &= ~$sort::fast_bit;
- $^H{SORT} |= $sort::safe_bits;
- $_ = 'mergesort';
- redo;
- } elsif ($_ eq 'fast') {
- $^H{SORT} &= ~$sort::safe_bits;
- $^H{SORT} |= $sort::fast_bit;
- $_ = 'quicksort';
- redo;
+ } elsif ($_ eq 'stable') {
+ $^H{SORT} |= $sort::stable_bit;
} else {
require Carp;
Carp::croak("sort: unknown subpragma '@_'");
@@ -54,8 +41,7 @@ sub current {
if ($^H{SORT}) {
push @sort, 'quicksort' if $^H{SORT} & $sort::quicksort_bit;
push @sort, 'mergesort' if $^H{SORT} & $sort::mergesort_bit;
- push @sort, 'safe' if $^H{SORT} & $sort::safe_bits;
- push @sort, 'fast' if $^H{SORT} & $sort::fast_bit;
+ push @sort, 'stable' if $^H{SORT} & $sort::stable_bit;
}
push @sort, 'mergesort' unless @sort;
join(' ', @sort);
@@ -70,18 +56,13 @@ sort - perl pragma to control sort() behaviour
=head1 SYNOPSIS
- use sort 'quicksort';
- use sort 'mergesort';
+ use sort 'stable'; # guarantee stability
+ use sort '_quicksort'; # use a quicksort algorithm
+ use sort '_mergesort'; # use a mergesort algorithm
- use sort 'qsort'; # alias for quicksort
+ use sort '_qsort'; # alias for quicksort
- # alias for mergesort: insensitive and stable
- use sort 'safe';
-
- # alias for raw quicksort: sensitive and nonstable
- use sort 'fast';
-
- my $current = sort::current();
+ my $current = sort::current(); # identify prevailing algorithm
=head1 DESCRIPTION
@@ -89,24 +70,35 @@ With the sort pragma you can control the behaviour of the builtin
sort() function.
In Perl versions 5.6 and earlier the quicksort algorithm was used to
-implement sort(), but in Perl 5.8 the algorithm was changed to mergesort,
-mainly to guarantee insensitiveness to sort input: the worst case of
-quicksort is O(N**2), while mergesort is always O(N log N).
-
-On the other hand, for same cases (especially for shorter inputs)
-quicksort is faster.
-
-In Perl 5.8 and later by default quicksort is wrapped into a
-stabilizing layer. A stable sort means that for records that compare
-equal, the original input ordering is preserved. Mergesort is stable;
-quicksort is not.
-
-The metapragmas 'fast' and 'safe' select quicksort without the
-stabilizing layer and mergesort, respectively. In other words,
-'safe' is the default.
-
-Finally, the sort performance is also dependent on the platform
-(smaller CPU caches favour quicksort).
+implement sort(), but in Perl 5.8 a mergesort algorithm was also made
+available, mainly to guarantee worst case O(N log N) behaviour:
+the worst case of quicksort is O(N**2). In Perl 5.8 and later,
+quicksort defends against quadratic behaviour by shuffling large
+arrays before sorting.
+
+A stable sort means that for records that compare equal, the original
+input ordering is preserved. Perl's mergesort is stable, quicksort is not.
+Stability will matter only if elements that compare equal can be
+distinguished in some other way. That means that simple numerical
+and lexical sorts do not profit from stability, since equal elements
+are indistinguishable. However, with a comparison such as
+
+ { substr($a, 0, 3) cmp substr($b, 0, 3) }
+
+stability might matter because elements that compare equal on the
+first 3 characters may be distinguished based on subsequent characters.
+In Perl 5.8 and later, quicksort can be stabilized, but doing so will
+add overhead, so it should only be done if it matters.
+
+The best algorithm depends on many things. On average, mergesort
+does fewer comparisons than quicksort, so it may be better when
+complicated comparison routines are used. Mergesort also takes
+advantage of pre-existing order, so it would be favored for using
+sort to merge several sorted arrays. On the other hand, quicksort
+is often faster for small arrays, and on platforms with small memory
+caches that are much faster than main memory. You can force the
+choice of algorithm with this pragma, but this feels heavy-handed,
+so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8.
=cut
diff --git a/lib/sort.t b/lib/sort.t
index 6b607161d0..44aaf8ffae 100644
--- a/lib/sort.t
+++ b/lib/sort.t
@@ -12,6 +12,6 @@ BEGIN {
ok(sort::current() eq 'mergesort');
}
-use sort 'fast';
-ok(sort::current() eq 'quicksort fast');
+use sort qw( stable _qsort );
+ok(sort::current() eq 'quicksort stable');
diff --git a/perl.h b/perl.h
index 7c51e93f43..c9d359c8f5 100644
--- a/perl.h
+++ b/perl.h
@@ -3103,10 +3103,7 @@ enum { /* pass one of these to get_vtbl */
#define HINT_SORT_SORT_BITS 0x000000FF /* allow 256 different ones */
#define HINT_SORT_QUICKSORT 0x00000001
#define HINT_SORT_MERGESORT 0x00000002
-#define HINT_SORT_STABLE 0x00000100 /* sort styles */
-#define HINT_SORT_INSENSITIVE 0x00000200
-#define HINT_SORT_SAFE 0x00000300 /* stable and insensitive */
-#define HINT_SORT_FAST 0x00000400 /* damn the icebergs */
+#define HINT_SORT_STABLE 0x00000100 /* sort styles (currently one) */
/* Various states of the input record separator SV (rs) */
#define RsSNARF(sv) (! SvOK(sv))
diff --git a/pp_sort.c b/pp_sort.c
index 844c0e396f..797cb22957 100644
--- a/pp_sort.c
+++ b/pp_sort.c
@@ -34,6 +34,10 @@ static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
(hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
(I32)SvIV(*hintsvp) : 0)
+#ifndef SMALLSORT
+#define SMALLSORT (200)
+#endif
+
/*
* The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
*
@@ -281,9 +285,11 @@ S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
gptr *aux, *list2, *p2, *last;
gptr *base = list1;
gptr *p1;
+ gptr small[SMALLSORT];
if (nmemb <= 1) return; /* sorted trivially */
- New(799,list2,nmemb,gptr); /* allocate auxilliary array */
+ if (nmemb <= SMALLSORT) list2 = small; /* use stack for aux array */
+ else { New(799,list2,nmemb,gptr); } /* allocate auxilliary array */
aux = list2;
dynprep(aTHX_ list1, list2, nmemb, cmp);
last = PINDEX(list2, nmemb);
@@ -395,7 +401,7 @@ S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
last = PINDEX(list1, nmemb);
FROMTOUPTO(list1, list2, last);
}
- Safefree(aux);
+ if (aux != small) Safefree(aux); /* free iff allocated */
return;
}
@@ -1101,10 +1107,6 @@ S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
/* Believe it or not, the array is sorted at this point! */
}
-#ifndef SMALLSORT
-#define SMALLSORT (200)
-#endif
-
/* Stabilize what is, presumably, an otherwise unstable sort method.
* We do that by allocating (or having on hand) an array of pointers
* that is the same size as the original array of elements to be sorted.
@@ -1175,9 +1177,7 @@ S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
{
SV **hintsvp;
- if (SORTHINTS(hintsvp) & HINT_SORT_FAST)
- S_qsortsvu(aTHX_ list1, nmemb, cmp);
- else {
+ if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
register gptr **pp, *q;
register size_t n, j, i;
gptr *small[SMALLSORT], **indir, tmp;
@@ -1238,6 +1238,8 @@ S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
if (indir != small) { Safefree(indir); }
/* restore prevailing comparison routine */
RealCmp = savecmp;
+ } else {
+ S_qsortsvu(aTHX_ list1, nmemb, cmp);
}
}