summaryrefslogtreecommitdiff
path: root/pp_sort.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2016-08-10 15:12:56 +0100
committerDavid Mitchell <davem@iabyn.com>2016-08-10 16:34:04 +0100
commit84721d614eb7d9835d9a09505b0001c7be40a865 (patch)
tree1c189557382a81051167278bce63d96843f5e14b /pp_sort.c
parent9c575c5c562583fedc6e491a509a388ce3b386bd (diff)
downloadperl-84721d614eb7d9835d9a09505b0001c7be40a865.tar.gz
Partially pessimise in-place sorting
There's currently an optimisation that converts at compile-time @a = sort { .... } @a into (approximately) sort { ... } \@a Then at run time, rather than passing an svp pointer to the appropriate sort routine which points to a list of SV*'s on the stack, pp_sort() passes a pointer to @a's AvARRAY. This allows the array to be sorted in-place, which is more efficient. However, it has some issues. First, the @a visible to the sort routine will be varying, whereas logically it should still hold the original list of values until after the '@a = ...' assignment. Secondly, the mergesort algorithm cureently used internally, when in mid-sort, temporarily stores pointers in the array which aren't pointers to SVs - this means that if @a elements are accessed mid-sort, it can crash. The solution to both these problems is for pp_sort() to push the elements of @a onto the stack at the beginning, sort the stack (like normal sorts do), then copy back to @a at the end. This is less efficient than before, but is still a lot more efficient than executing separate padav and aassign ops. Here are benchmark results in raw instruction counts etc (lower is better) for the sort line in this code block: my (@a, @b); @a = reverse 1..10; @b = sort { $a <=> $b } @a; A is for a non-in-place sort, i.e. @b = sort ... @a; B and C are for an inline sort, i.e. as above, but @a = sort ... @a; where B is blead before this commit and C is this commit. A B C ------ ------ ------ Ir 5238.0 2324.0 2772.0 Dr 1464.0 649.0 765.0 Dw 919.0 298.0 370.0 COND 782.0 320.0 405.0 IND 25.0 25.0 26.0 COND_m 14.9 13.0 17.0 IND_m 8.0 5.0 5.0 As can be seen, this partial pessimisation slows down in-place sorting by round 20%, but overall in-place is still nearly twice the speed as without the optimisation. These are the figures for a plain numeric sort (which is optimised to use a C comparison function); for other types of sort, the cost of the comparator dominates, and so the slowdown is much less marked.
Diffstat (limited to 'pp_sort.c')
-rw-r--r--pp_sort.c93
1 files changed, 52 insertions, 41 deletions
diff --git a/pp_sort.c b/pp_sort.c
index c91aab06f6..e171411c4b 100644
--- a/pp_sort.c
+++ b/pp_sort.c
@@ -1482,7 +1482,6 @@ PP(pp_sort)
bool hasargs = FALSE;
bool copytmps;
I32 is_xsub = 0;
- I32 sorting_av = 0;
const U8 priv = PL_op->op_private;
const U8 flags = PL_op->op_flags;
U32 sort_flags = 0;
@@ -1563,34 +1562,31 @@ PP(pp_sort)
PL_sortcop = NULL;
}
- /* optimiser converts "@a = sort @a" to "sort \@a";
- * in case of tied @a, pessimise: push (@a) onto stack, then assign
- * result back to @a at the end of this function */
+ /* optimiser converts "@a = sort @a" to "sort \@a". In this case,
+ * push (@a) onto stack, then assign result back to @a at the end of
+ * this function */
if (priv & OPpSORT_INPLACE) {
assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
(void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
av = MUTABLE_AV((*SP));
+ if (SvREADONLY(av))
+ Perl_croak_no_modify();
max = AvFILL(av) + 1;
+ MEXTEND(SP, max);
if (SvMAGICAL(av)) {
- MEXTEND(SP, max);
for (i=0; i < max; i++) {
SV **svp = av_fetch(av, i, FALSE);
*SP++ = (svp) ? *svp : NULL;
}
- SP--;
- p1 = p2 = SP - (max-1);
}
- else {
- if (SvREADONLY(av))
- Perl_croak_no_modify();
- else
- {
- SvREADONLY_on(av);
- save_pushptr((void *)av, SAVEt_READONLY_OFF);
- }
- p1 = p2 = AvARRAY(av);
- sorting_av = 1;
+ else {
+ SV **svp = AvARRAY(av);
+ assert(svp || max == 0);
+ for (i = 0; i < max; i++)
+ *SP++ = *svp++;
}
+ SP--;
+ p1 = p2 = SP - (max-1);
}
else {
p2 = MARK+1;
@@ -1600,7 +1596,7 @@ PP(pp_sort)
/* shuffle stack down, removing optional initial cv (p1!=p2), plus
* any nulls; also stringify or converting to integer or number as
* required any args */
- copytmps = !sorting_av && PL_sortcop;
+ copytmps = PL_sortcop;
for (i=max; i > 0 ; i--) {
if ((*p1 = *p2++)) { /* Weed out nulls. */
if (copytmps && SvPADTMP(*p1)) {
@@ -1633,9 +1629,6 @@ PP(pp_sort)
else
max--;
}
- if (sorting_av)
- AvFILLp(av) = max-1;
-
if (max > 1) {
SV **start;
if (PL_sortcop) {
@@ -1716,7 +1709,7 @@ PP(pp_sort)
}
else {
MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
- start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
+ start = ORIGMARK+1;
sortsvp(aTHX_ start, max,
(priv & OPpSORT_NUMERIC)
? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
@@ -1742,27 +1735,45 @@ PP(pp_sort)
}
}
}
- if (sorting_av)
- SvREADONLY_off(av);
- else if (av && !sorting_av) {
- /* simulate pp_aassign of tied AV */
- SV** const base = MARK+1;
- for (i=0; i < max; i++) {
- base[i] = newSVsv(base[i]);
- }
- av_clear(av);
- av_extend(av, max);
- for (i=0; i < max; i++) {
- SV * const sv = base[i];
- SV ** const didstore = av_store(av, i, sv);
- if (SvSMAGICAL(sv))
- mg_set(sv);
- if (!didstore)
- sv_2mortal(sv);
- }
+
+ if (av) {
+ /* copy back result to the array */
+ SV** const base = MARK+1;
+ if (SvMAGICAL(av)) {
+ for (i = 0; i < max; i++)
+ base[i] = newSVsv(base[i]);
+ av_clear(av);
+ av_extend(av, max);
+ for (i=0; i < max; i++) {
+ SV * const sv = base[i];
+ SV ** const didstore = av_store(av, i, sv);
+ if (SvSMAGICAL(sv))
+ mg_set(sv);
+ if (!didstore)
+ sv_2mortal(sv);
+ }
+ }
+ else {
+ /* the elements of av are likely to be the same as the
+ * (non-refcounted) elements on the stack, just in a different
+ * order. However, its possible that someone's messed with av
+ * in the meantime. So bump and unbump the relevant refcounts
+ * first.
+ */
+ for (i = 0; i < max; i++)
+ SvREFCNT_inc_void(base[i]);
+ av_clear(av);
+ if (max > 0) {
+ av_extend(av, max);
+ Copy(base, AvARRAY(av), max, SV*);
+ }
+ AvFILLp(av) = max - 1;
+ AvREIFY_off(av);
+ AvREAL_on(av);
+ }
}
LEAVE;
- PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
+ PL_stack_sp = ORIGMARK + max;
return nextop;
}