diff options
author | David Mitchell <davem@iabyn.com> | 2016-08-10 15:12:56 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2016-08-10 16:34:04 +0100 |
commit | 84721d614eb7d9835d9a09505b0001c7be40a865 (patch) | |
tree | 1c189557382a81051167278bce63d96843f5e14b /pp_sort.c | |
parent | 9c575c5c562583fedc6e491a509a388ce3b386bd (diff) | |
download | perl-84721d614eb7d9835d9a09505b0001c7be40a865.tar.gz |
Partially pessimise in-place sorting
There's currently an optimisation that converts at compile-time
@a = sort { .... } @a
into (approximately)
sort { ... } \@a
Then at run time, rather than passing an svp pointer to the appropriate
sort routine which points to a list of SV*'s on the stack, pp_sort()
passes a pointer to @a's AvARRAY. This allows the array to be sorted
in-place, which is more efficient.
However, it has some issues. First, the @a visible to the sort routine
will be varying, whereas logically it should still hold the original list
of values until after the '@a = ...' assignment.
Secondly, the mergesort algorithm cureently used internally, when in
mid-sort, temporarily stores pointers in the array which aren't pointers
to SVs - this means that if @a elements are accessed mid-sort, it can
crash.
The solution to both these problems is for pp_sort() to push the elements
of @a onto the stack at the beginning, sort the stack (like normal sorts
do), then copy back to @a at the end. This is less efficient than before,
but is still a lot more efficient than executing separate padav and
aassign ops.
Here are benchmark results in raw instruction counts etc (lower is better)
for the sort line in this code block:
my (@a, @b);
@a = reverse 1..10;
@b = sort { $a <=> $b } @a;
A is for a non-in-place sort, i.e. @b = sort ... @a;
B and C are for an inline sort, i.e. as above, but @a = sort ... @a;
where B is blead before this commit and C is this commit.
A B C
------ ------ ------
Ir 5238.0 2324.0 2772.0
Dr 1464.0 649.0 765.0
Dw 919.0 298.0 370.0
COND 782.0 320.0 405.0
IND 25.0 25.0 26.0
COND_m 14.9 13.0 17.0
IND_m 8.0 5.0 5.0
As can be seen, this partial pessimisation slows down in-place sorting by
round 20%, but overall in-place is still nearly twice the speed as without
the optimisation.
These are the figures for a plain numeric sort (which is optimised to use
a C comparison function); for other types of sort, the cost of the
comparator dominates, and so the slowdown is much less marked.
Diffstat (limited to 'pp_sort.c')
-rw-r--r-- | pp_sort.c | 93 |
1 files changed, 52 insertions, 41 deletions
@@ -1482,7 +1482,6 @@ PP(pp_sort) bool hasargs = FALSE; bool copytmps; I32 is_xsub = 0; - I32 sorting_av = 0; const U8 priv = PL_op->op_private; const U8 flags = PL_op->op_flags; U32 sort_flags = 0; @@ -1563,34 +1562,31 @@ PP(pp_sort) PL_sortcop = NULL; } - /* optimiser converts "@a = sort @a" to "sort \@a"; - * in case of tied @a, pessimise: push (@a) onto stack, then assign - * result back to @a at the end of this function */ + /* optimiser converts "@a = sort @a" to "sort \@a". In this case, + * push (@a) onto stack, then assign result back to @a at the end of + * this function */ if (priv & OPpSORT_INPLACE) { assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV); (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */ av = MUTABLE_AV((*SP)); + if (SvREADONLY(av)) + Perl_croak_no_modify(); max = AvFILL(av) + 1; + MEXTEND(SP, max); if (SvMAGICAL(av)) { - MEXTEND(SP, max); for (i=0; i < max; i++) { SV **svp = av_fetch(av, i, FALSE); *SP++ = (svp) ? *svp : NULL; } - SP--; - p1 = p2 = SP - (max-1); } - else { - if (SvREADONLY(av)) - Perl_croak_no_modify(); - else - { - SvREADONLY_on(av); - save_pushptr((void *)av, SAVEt_READONLY_OFF); - } - p1 = p2 = AvARRAY(av); - sorting_av = 1; + else { + SV **svp = AvARRAY(av); + assert(svp || max == 0); + for (i = 0; i < max; i++) + *SP++ = *svp++; } + SP--; + p1 = p2 = SP - (max-1); } else { p2 = MARK+1; @@ -1600,7 +1596,7 @@ PP(pp_sort) /* shuffle stack down, removing optional initial cv (p1!=p2), plus * any nulls; also stringify or converting to integer or number as * required any args */ - copytmps = !sorting_av && PL_sortcop; + copytmps = PL_sortcop; for (i=max; i > 0 ; i--) { if ((*p1 = *p2++)) { /* Weed out nulls. */ if (copytmps && SvPADTMP(*p1)) { @@ -1633,9 +1629,6 @@ PP(pp_sort) else max--; } - if (sorting_av) - AvFILLp(av) = max-1; - if (max > 1) { SV **start; if (PL_sortcop) { @@ -1716,7 +1709,7 @@ PP(pp_sort) } else { MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */ - start = sorting_av ? AvARRAY(av) : ORIGMARK+1; + start = ORIGMARK+1; sortsvp(aTHX_ start, max, (priv & OPpSORT_NUMERIC) ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs) @@ -1742,27 +1735,45 @@ PP(pp_sort) } } } - if (sorting_av) - SvREADONLY_off(av); - else if (av && !sorting_av) { - /* simulate pp_aassign of tied AV */ - SV** const base = MARK+1; - for (i=0; i < max; i++) { - base[i] = newSVsv(base[i]); - } - av_clear(av); - av_extend(av, max); - for (i=0; i < max; i++) { - SV * const sv = base[i]; - SV ** const didstore = av_store(av, i, sv); - if (SvSMAGICAL(sv)) - mg_set(sv); - if (!didstore) - sv_2mortal(sv); - } + + if (av) { + /* copy back result to the array */ + SV** const base = MARK+1; + if (SvMAGICAL(av)) { + for (i = 0; i < max; i++) + base[i] = newSVsv(base[i]); + av_clear(av); + av_extend(av, max); + for (i=0; i < max; i++) { + SV * const sv = base[i]; + SV ** const didstore = av_store(av, i, sv); + if (SvSMAGICAL(sv)) + mg_set(sv); + if (!didstore) + sv_2mortal(sv); + } + } + else { + /* the elements of av are likely to be the same as the + * (non-refcounted) elements on the stack, just in a different + * order. However, its possible that someone's messed with av + * in the meantime. So bump and unbump the relevant refcounts + * first. + */ + for (i = 0; i < max; i++) + SvREFCNT_inc_void(base[i]); + av_clear(av); + if (max > 0) { + av_extend(av, max); + Copy(base, AvARRAY(av), max, SV*); + } + AvFILLp(av) = max - 1; + AvREIFY_off(av); + AvREAL_on(av); + } } LEAVE; - PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max); + PL_stack_sp = ORIGMARK + max; return nextop; } |