/* pp_sort.c * * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * */ /* * ...they shuffled back towards the rear of the line. 'No, not at the * rear!' the slave-driver shouted. 'Three files up. And stay there... */ #include "EXTERN.h" #define PERL_IN_PP_SORT_C #include "perl.h" static I32 sortcv(pTHX_ SV *a, SV *b); static I32 sortcv_stacked(pTHX_ SV *a, SV *b); static I32 sortcv_xsub(pTHX_ SV *a, SV *b); static I32 sv_ncmp(pTHX_ SV *a, SV *b); static I32 sv_i_ncmp(pTHX_ SV *a, SV *b); static I32 amagic_ncmp(pTHX_ SV *a, SV *b); static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b); static I32 amagic_cmp(pTHX_ SV *a, SV *b); static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b); #define sv_cmp_static Perl_sv_cmp #define sv_cmp_locale_static Perl_sv_cmp_locale #define SORTHINTS(hintsvp) \ ((PL_hintgv && \ (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \ (I32)SvIV(*hintsvp) : 0) /* * The mergesort implementation is by Peter M. Mcilroy . * * The original code was written in conjunction with BSD Computer Software * Research Group at University of California, Berkeley. * * See also: "Optimistic Merge Sort" (SODA '92) * * The integration to Perl is by John P. Linderman . * * The code can be distributed under the same terms as Perl itself. * */ #ifdef TESTHARNESS #include typedef void SV; #define pTHX_ #define STATIC #define New(ID,VAR,N,TYPE) VAR=(TYPE *)malloc((N)*sizeof(TYPE)) #define Safefree(VAR) free(VAR) typedef int (*SVCOMPARE_t) (pTHX_ SV*, SV*); #endif /* TESTHARNESS */ typedef char * aptr; /* pointer for arithmetic on sizes */ typedef SV * gptr; /* pointers in our lists */ /* Binary merge internal sort, with a few special mods ** for the special perl environment it now finds itself in. ** ** Things that were once options have been hotwired ** to values suitable for this use. In particular, we'll always ** initialize looking for natural runs, we'll always produce stable ** output, and we'll always do Peter McIlroy's binary merge. */ /* Pointer types for arithmetic and storage and convenience casts */ #define APTR(P) ((aptr)(P)) #define GPTP(P) ((gptr *)(P)) #define GPPP(P) ((gptr **)(P)) /* byte offset from pointer P to (larger) pointer Q */ #define BYTEOFF(P, Q) (APTR(Q) - APTR(P)) #define PSIZE sizeof(gptr) /* If PSIZE is power of 2, make PSHIFT that power, if that helps */ #ifdef PSHIFT #define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT)) #define PNBYTE(N) ((N) << (PSHIFT)) #define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N))) #else /* Leave optimization to compiler */ #define PNELEM(P, Q) (GPTP(Q) - GPTP(P)) #define PNBYTE(N) ((N) * (PSIZE)) #define PINDEX(P, N) (GPTP(P) + (N)) #endif /* Pointer into other corresponding to pointer into this */ #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P)) #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src= 2 * PTHRESH. We only try to form long runs when ** PTHRESH adjacent pairs compare in the same way, suggesting overall order. ** ** Unless otherwise specified, pair pointers address the first of two elements. ** ** b and b+1 are a pair that compare with sense ``sense''. ** b is the ``bottom'' of adjacent pairs that might form a longer run. ** ** p2 parallels b in the list2 array, where runs are defined by ** a pointer chain. ** ** t represents the ``top'' of the adjacent pairs that might extend ** the run beginning at b. Usually, t addresses a pair ** that compares with opposite sense from (b,b+1). ** However, it may also address a singleton element at the end of list1, ** or it may be equal to ``last'', the first element beyond list1. ** ** r addresses the Nth pair following b. If this would be beyond t, ** we back it off to t. Only when r is less than t do we consider the ** run long enough to consider checking. ** ** q addresses a pair such that the pairs at b through q already form a run. ** Often, q will equal b, indicating we only are sure of the pair itself. ** However, a search on the previous cycle may have revealed a longer run, ** so q may be greater than b. ** ** p is used to work back from a candidate r, trying to reach q, ** which would mean b through r would be a run. If we discover such a run, ** we start q at r and try to push it further towards t. ** If b through r is NOT a run, we detect the wrong order at (p-1,p). ** In any event, after the check (if any), we have two main cases. ** ** 1) Short run. b <= q < p <= r <= t. ** b through q is a run (perhaps trivial) ** q through p are uninteresting pairs ** p through r is a run ** ** 2) Long run. b < r <= q < t. ** b through q is a run (of length >= 2 * PTHRESH) ** ** Note that degenerate cases are not only possible, but likely. ** For example, if the pair following b compares with opposite sense, ** then b == q < p == r == t. */ static void dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp) { int sense; register gptr *b, *p, *q, *t, *p2; register gptr c, *last, *r; gptr *savep; b = list1; last = PINDEX(b, nmemb); sense = (cmp(aTHX_ *b, *(b+1)) > 0); for (p2 = list2; b < last; ) { /* We just started, or just reversed sense. ** Set t at end of pairs with the prevailing sense. */ for (p = b+2, t = p; ++p < last; t = ++p) { if ((cmp(aTHX_ *t, *p) > 0) != sense) break; } q = b; /* Having laid out the playing field, look for long runs */ do { p = r = b + (2 * PTHRESH); if (r >= t) p = r = t; /* too short to care about */ else { while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) && ((p -= 2) > q)); if (p <= q) { /* b through r is a (long) run. ** Extend it as far as possible. */ p = q = r; while (((p += 2) < t) && ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p; r = p = q + 2; /* no simple pairs, no after-run */ } } if (q > b) { /* run of greater than 2 at b */ savep = p; p = q += 2; /* pick up singleton, if possible */ if ((p == t) && ((t + 1) == last) && ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) savep = r = p = q = last; p2 = NEXT(p2) = p2 + (p - b); if (sense) while (b < --p) { c = *b; *b++ = *p; *p = c; } p = savep; } while (q < p) { /* simple pairs */ p2 = NEXT(p2) = p2 + 2; if (sense) { c = *q++; *(q-1) = *q; *q++ = c; } else q += 2; } if (((b = p) == t) && ((t+1) == last)) { NEXT(p2) = p2 + 1; b++; } q = r; } while (b < t); sense = !sense; } return; } /* Overview of bmerge variables: ** ** list1 and list2 address the main and auxiliary arrays. ** They swap identities after each merge pass. ** Base points to the original list1, so we can tell if ** the pointers ended up where they belonged (or must be copied). ** ** When we are merging two lists, f1 and f2 are the next elements ** on the respective lists. l1 and l2 mark the end of the lists. ** tp2 is the current location in the merged list. ** ** p1 records where f1 started. ** After the merge, a new descriptor is built there. ** ** p2 is a ``parallel'' pointer in (what starts as) descriptor space. ** It is used to identify and delimit the runs. ** ** In the heat of determining where q, the greater of the f1/f2 elements, ** belongs in the other list, b, t and p, represent bottom, top and probe ** locations, respectively, in the other list. ** They make convenient temporary pointers in other places. */ STATIC void S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) { int i, run; int sense; register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q; gptr *aux, *list2, *p2, *last; gptr *base = list1; gptr *p1; if (nmemb <= 1) return; /* sorted trivially */ New(799,list2,nmemb,gptr); /* allocate auxilliary array */ aux = list2; dynprep(aTHX_ list1, list2, nmemb, cmp); last = PINDEX(list2, nmemb); while (NEXT(list2) != last) { /* More than one run remains. Do some merging to reduce runs. */ l2 = p1 = list1; for (tp2 = p2 = list2; p2 != last;) { /* The new first run begins where the old second list ended. ** Use the p2 ``parallel'' pointer to identify the end of the run. */ f1 = l2; t = NEXT(p2); f2 = l1 = POTHER(t, list2, list1); if (t != last) t = NEXT(t); l2 = POTHER(t, list2, list1); p2 = t; while (f1 < l1 && f2 < l2) { /* If head 1 is larger than head 2, find ALL the elements ** in list 2 strictly less than head1, write them all, ** then head 1. Then compare the new heads, and repeat, ** until one or both lists are exhausted. ** ** In all comparisons (after establishing ** which head to merge) the item to merge ** (at pointer q) is the first operand of ** the comparison. When we want to know ** if ``q is strictly less than the other'', ** we can't just do ** cmp(q, other) < 0 ** because stability demands that we treat equality ** as high when q comes from l2, and as low when ** q was from l1. So we ask the question by doing ** cmp(q, other) <= sense ** and make sense == 0 when equality should look low, ** and -1 when equality should look high. */ if (cmp(aTHX_ *f1, *f2) <= 0) { q = f2; b = f1; t = l1; sense = -1; } else { q = f1; b = f2; t = l2; sense = 0; } /* ramp up ** ** Leave t at something strictly ** greater than q (or at the end of the list), ** and b at something strictly less than q. */ for (i = 1, run = 0 ;;) { if ((p = PINDEX(b, i)) >= t) { /* off the end */ if (((p = PINDEX(t, -1)) > b) && (cmp(aTHX_ *q, *p) <= sense)) t = p; else b = p; break; } else if (cmp(aTHX_ *q, *p) <= sense) { t = p; break; } else b = p; if (++run >= RTHRESH) i += i; } /* q is known to follow b and must be inserted before t. ** Increment b, so the range of possibilities is [b,t). ** Round binary split down, to favor early appearance. ** Adjust b and t until q belongs just before t. */ b++; while (b < t) { p = PINDEX(b, (PNELEM(b, t) - 1) / 2); if (cmp(aTHX_ *q, *p) <= sense) { t = p; } else b = p + 1; } /* Copy all the strictly low elements */ if (q == f1) { FROMTOUPTO(f2, tp2, t); *tp2++ = *f1++; } else { FROMTOUPTO(f1, tp2, t); *tp2++ = *f2++; } } /* Run out remaining list */ if (f1 == l1) { if (f2 < l2) FROMTOUPTO(f2, tp2, l2); } else FROMTOUPTO(f1, tp2, l1); p1 = NEXT(p1) = POTHER(tp2, list2, list1); } t = list1; list1 = list2; list2 = t; last = PINDEX(list2, nmemb); } if (base == list2) { last = PINDEX(list1, nmemb); FROMTOUPTO(list1, list2, last); } Safefree(aux); return; } /* * The quicksort implementation was derived from source code contributed * by Tom Horsley. * * NOTE: this code was derived from Tom Horsley's qsort replacement * and should not be confused with the original code. */ /* Copyright (C) Tom Horsley, 1997. All rights reserved. Permission granted to distribute under the same terms as perl which are (briefly): This program is free software; you can redistribute it and/or modify it under the terms of either: a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" which comes with this Kit. Details on the perl license can be found in the perl source code which may be located via the www.perl.com web page. This is the most wonderfulest possible qsort I can come up with (and still be mostly portable) My (limited) tests indicate it consistently does about 20% fewer calls to compare than does the qsort in the Visual C++ library, other vendors may vary. Some of the ideas in here can be found in "Algorithms" by Sedgewick, others I invented myself (or more likely re-invented since they seemed pretty obvious once I watched the algorithm operate for a while). Most of this code was written while watching the Marlins sweep the Giants in the 1997 National League Playoffs - no Braves fans allowed to use this code (just kidding :-). I realize that if I wanted to be true to the perl tradition, the only comment in this file would be something like: ...they shuffled back towards the rear of the line. 'No, not at the rear!' the slave-driver shouted. 'Three files up. And stay there... However, I really needed to violate that tradition just so I could keep track of what happens myself, not to mention some poor fool trying to understand this years from now :-). */ /* ********************************************************** Configuration */ #ifndef QSORT_ORDER_GUESS #define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */ #endif /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for future processing - a good max upper bound is log base 2 of memory size (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can safely be smaller than that since the program is taking up some space and most operating systems only let you grab some subset of contiguous memory (not to mention that you are normally sorting data larger than 1 byte element size :-). */ #ifndef QSORT_MAX_STACK #define QSORT_MAX_STACK 32 #endif /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort. Anything bigger and we use qsort. If you make this too small, the qsort will probably break (or become less efficient), because it doesn't expect the middle element of a partition to be the same as the right or left - you have been warned). */ #ifndef QSORT_BREAK_EVEN #define QSORT_BREAK_EVEN 6 #endif /* ************************************************************* Data Types */ /* hold left and right index values of a partition waiting to be sorted (the partition includes both left and right - right is NOT one past the end or anything like that). */ struct partition_stack_entry { int left; int right; #ifdef QSORT_ORDER_GUESS int qsort_break_even; #endif }; /* ******************************************************* Shorthand Macros */ /* Note that these macros will be used from inside the qsort function where we happen to know that the variable 'elt_size' contains the size of an array element and the variable 'temp' points to enough space to hold a temp element and the variable 'array' points to the array being sorted and 'compare' is the pointer to the compare routine. Also note that there are very many highly architecture specific ways these might be sped up, but this is simply the most generally portable code I could think of. */ /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2 */ #define qsort_cmp(elt1, elt2) \ ((*compare)(aTHX_ array[elt1], array[elt2])) #ifdef QSORT_ORDER_GUESS #define QSORT_NOTICE_SWAP swapped++; #else #define QSORT_NOTICE_SWAP #endif /* swaps contents of array elements elt1, elt2. */ #define qsort_swap(elt1, elt2) \ STMT_START { \ QSORT_NOTICE_SWAP \ temp = array[elt1]; \ array[elt1] = array[elt2]; \ array[elt2] = temp; \ } STMT_END /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets elt3 and elt3 gets elt1. */ #define qsort_rotate(elt1, elt2, elt3) \ STMT_START { \ QSORT_NOTICE_SWAP \ temp = array[elt1]; \ array[elt1] = array[elt2]; \ array[elt2] = array[elt3]; \ array[elt3] = temp; \ } STMT_END /* ************************************************************ Debug stuff */ #ifdef QSORT_DEBUG static void break_here() { return; /* good place to set a breakpoint */ } #define qsort_assert(t) (void)( (t) || (break_here(), 0) ) static void doqsort_all_asserts( void * array, size_t num_elts, size_t elt_size, int (*compare)(const void * elt1, const void * elt2), int pc_left, int pc_right, int u_left, int u_right) { int i; qsort_assert(pc_left <= pc_right); qsort_assert(u_right < pc_left); qsort_assert(pc_right < u_left); for (i = u_right + 1; i < pc_left; ++i) { qsort_assert(qsort_cmp(i, pc_left) < 0); } for (i = pc_left; i < pc_right; ++i) { qsort_assert(qsort_cmp(i, pc_right) == 0); } for (i = pc_right + 1; i < u_left; ++i) { qsort_assert(qsort_cmp(pc_right, i) < 0); } } #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \ doqsort_all_asserts(array, num_elts, elt_size, compare, \ PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) #else #define qsort_assert(t) ((void)0) #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0) #endif /* ****************************************************************** qsort */ STATIC void /* the standard unstable (u) quicksort (qsort) */ S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare) { register SV * temp; struct partition_stack_entry partition_stack[QSORT_MAX_STACK]; int next_stack_entry = 0; int part_left; int part_right; #ifdef QSORT_ORDER_GUESS int qsort_break_even; int swapped; #endif /* Make sure we actually have work to do. */ if (num_elts <= 1) { return; } /* Setup the initial partition definition and fall into the sorting loop */ part_left = 0; part_right = (int)(num_elts - 1); #ifdef QSORT_ORDER_GUESS qsort_break_even = QSORT_BREAK_EVEN; #else #define qsort_break_even QSORT_BREAK_EVEN #endif for ( ; ; ) { if ((part_right - part_left) >= qsort_break_even) { /* OK, this is gonna get hairy, so lets try to document all the concepts and abbreviations and variables and what they keep track of: pc: pivot chunk - the set of array elements we accumulate in the middle of the partition, all equal in value to the original pivot element selected. The pc is defined by: pc_left - the leftmost array index of the pc pc_right - the rightmost array index of the pc we start with pc_left == pc_right and only one element in the pivot chunk (but it can grow during the scan). u: uncompared elements - the set of elements in the partition we have not yet compared to the pivot value. There are two uncompared sets during the scan - one to the left of the pc and one to the right. u_right - the rightmost index of the left side's uncompared set u_left - the leftmost index of the right side's uncompared set The leftmost index of the left sides's uncompared set doesn't need its own variable because it is always defined by the leftmost edge of the whole partition (part_left). The same goes for the rightmost edge of the right partition (part_right). We know there are no uncompared elements on the left once we get u_right < part_left and no uncompared elements on the right once u_left > part_right. When both these conditions are met, we have completed the scan of the partition. Any elements which are between the pivot chunk and the uncompared elements should be less than the pivot value on the left side and greater than the pivot value on the right side (in fact, the goal of the whole algorithm is to arrange for that to be true and make the groups of less-than and greater-then elements into new partitions to sort again). As you marvel at the complexity of the code and wonder why it has to be so confusing. Consider some of the things this level of confusion brings: Once I do a compare, I squeeze every ounce of juice out of it. I never do compare calls I don't have to do, and I certainly never do redundant calls. I also never swap any elements unless I can prove there is a good reason. Many sort algorithms will swap a known value with an uncompared value just to get things in the right place (or avoid complexity :-), but that uncompared value, once it gets compared, may then have to be swapped again. A lot of the complexity of this code is due to the fact that it never swaps anything except compared values, and it only swaps them when the compare shows they are out of position. */ int pc_left, pc_right; int u_right, u_left; int s; pc_left = ((part_left + part_right) / 2); pc_right = pc_left; u_right = pc_left - 1; u_left = pc_right + 1; /* Qsort works best when the pivot value is also the median value in the partition (unfortunately you can't find the median value without first sorting :-), so to give the algorithm a helping hand, we pick 3 elements and sort them and use the median value of that tiny set as the pivot value. Some versions of qsort like to use the left middle and right as the 3 elements to sort so they can insure the ends of the partition will contain values which will stop the scan in the compare loop, but when you have to call an arbitrarily complex routine to do a compare, its really better to just keep track of array index values to know when you hit the edge of the partition and avoid the extra compare. An even better reason to avoid using a compare call is the fact that you can drop off the edge of the array if someone foolishly provides you with an unstable compare function that doesn't always provide consistent results. So, since it is simpler for us to compare the three adjacent elements in the middle of the partition, those are the ones we pick here (conveniently pointed at by u_right, pc_left, and u_left). The values of the left, center, and right elements are refered to as l c and r in the following comments. */ #ifdef QSORT_ORDER_GUESS swapped = 0; #endif s = qsort_cmp(u_right, pc_left); if (s < 0) { /* l < c */ s = qsort_cmp(pc_left, u_left); /* if l < c, c < r - already in order - nothing to do */ if (s == 0) { /* l < c, c == r - already in order, pc grows */ ++pc_right; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else if (s > 0) { /* l < c, c > r - need to know more */ s = qsort_cmp(u_right, u_left); if (s < 0) { /* l < c, c > r, l < r - swap c & r to get ordered */ qsort_swap(pc_left, u_left); qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else if (s == 0) { /* l < c, c > r, l == r - swap c&r, grow pc */ qsort_swap(pc_left, u_left); --pc_left; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else { /* l < c, c > r, l > r - make lcr into rlc to get ordered */ qsort_rotate(pc_left, u_right, u_left); qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } } } else if (s == 0) { /* l == c */ s = qsort_cmp(pc_left, u_left); if (s < 0) { /* l == c, c < r - already in order, grow pc */ --pc_left; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else if (s == 0) { /* l == c, c == r - already in order, grow pc both ways */ --pc_left; ++pc_right; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else { /* l == c, c > r - swap l & r, grow pc */ qsort_swap(u_right, u_left); ++pc_right; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } } else { /* l > c */ s = qsort_cmp(pc_left, u_left); if (s < 0) { /* l > c, c < r - need to know more */ s = qsort_cmp(u_right, u_left); if (s < 0) { /* l > c, c < r, l < r - swap l & c to get ordered */ qsort_swap(u_right, pc_left); qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else if (s == 0) { /* l > c, c < r, l == r - swap l & c, grow pc */ qsort_swap(u_right, pc_left); ++pc_right; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else { /* l > c, c < r, l > r - rotate lcr into crl to order */ qsort_rotate(u_right, pc_left, u_left); qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } } else if (s == 0) { /* l > c, c == r - swap ends, grow pc */ qsort_swap(u_right, u_left); --pc_left; qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } else { /* l > c, c > r - swap ends to get in order */ qsort_swap(u_right, u_left); qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1); } } /* We now know the 3 middle elements have been compared and arranged in the desired order, so we can shrink the uncompared sets on both sides */ --u_right; ++u_left; qsort_all_asserts(pc_left, pc_right, u_left, u_right); /* The above massive nested if was the simple part :-). We now have the middle 3 elements ordered and we need to scan through the uncompared sets on either side, swapping elements that are on the wrong side or simply shuffling equal elements around to get all equal elements into the pivot chunk. */ for ( ; ; ) { int still_work_on_left; int still_work_on_right; /* Scan the uncompared values on the left. If I find a value equal to the pivot value, move it over so it is adjacent to the pivot chunk and expand the pivot chunk. If I find a value less than the pivot value, then just leave it - its already on the correct side of the partition. If I find a greater value, then stop the scan. */ while ((still_work_on_left = (u_right >= part_left))) { s = qsort_cmp(u_right, pc_left); if (s < 0) { --u_right; } else if (s == 0) { --pc_left; if (pc_left != u_right) { qsort_swap(u_right, pc_left); } --u_right; } else { break; } qsort_assert(u_right < pc_left); qsort_assert(pc_left <= pc_right); qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0); qsort_assert(qsort_cmp(pc_left, pc_right) == 0); } /* Do a mirror image scan of uncompared values on the right */ while ((still_work_on_right = (u_left <= part_right))) { s = qsort_cmp(pc_right, u_left); if (s < 0) { ++u_left; } else if (s == 0) { ++pc_right; if (pc_right != u_left) { qsort_swap(pc_right, u_left); } ++u_left; } else { break; } qsort_assert(u_left > pc_right); qsort_assert(pc_left <= pc_right); qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0); qsort_assert(qsort_cmp(pc_left, pc_right) == 0); } if (still_work_on_left) { /* I know I have a value on the left side which needs to be on the right side, but I need to know more to decide exactly the best thing to do with it. */ if (still_work_on_right) { /* I know I have values on both side which are out of position. This is a big win because I kill two birds with one swap (so to speak). I can advance the uncompared pointers on both sides after swapping both of them into the right place. */ qsort_swap(u_right, u_left); --u_right; ++u_left; qsort_all_asserts(pc_left, pc_right, u_left, u_right); } else { /* I have an out of position value on the left, but the right is fully scanned, so I "slide" the pivot chunk and any less-than values left one to make room for the greater value over on the right. If the out of position value is immediately adjacent to the pivot chunk (there are no less-than values), I can do that with a swap, otherwise, I have to rotate one of the less than values into the former position of the out of position value and the right end of the pivot chunk into the left end (got all that?). */ --pc_left; if (pc_left == u_right) { qsort_swap(u_right, pc_right); qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1); } else { qsort_rotate(u_right, pc_left, pc_right); qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1); } --pc_right; --u_right; } } else if (still_work_on_right) { /* Mirror image of complex case above: I have an out of position value on the right, but the left is fully scanned, so I need to shuffle things around to make room for the right value on the left. */ ++pc_right; if (pc_right == u_left) { qsort_swap(u_left, pc_left); qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right); } else { qsort_rotate(pc_right, pc_left, u_left); qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right); } ++pc_left; ++u_left; } else { /* No more scanning required on either side of partition, break out of loop and figure out next set of partitions */ break; } } /* The elements in the pivot chunk are now in the right place. They will never move or be compared again. All I have to do is decide what to do with the stuff to the left and right of the pivot chunk. Notes on the QSORT_ORDER_GUESS ifdef code: 1. If I just built these partitions without swapping any (or very many) elements, there is a chance that the elements are already ordered properly (being properly ordered will certainly result in no swapping, but the converse can't be proved :-). 2. A (properly written) insertion sort will run faster on already ordered data than qsort will. 3. Perhaps there is some way to make a good guess about switching to an insertion sort earlier than partition size 6 (for instance - we could save the partition size on the stack and increase the size each time we find we didn't swap, thus switching to insertion sort earlier for partitions with a history of not swapping). 4. Naturally, if I just switch right away, it will make artificial benchmarks with pure ascending (or descending) data look really good, but is that a good reason in general? Hard to say... */ #ifdef QSORT_ORDER_GUESS if (swapped < 3) { #if QSORT_ORDER_GUESS == 1 qsort_break_even = (part_right - part_left) + 1; #endif #if QSORT_ORDER_GUESS == 2 qsort_break_even *= 2; #endif #if QSORT_ORDER_GUESS == 3 int prev_break = qsort_break_even; qsort_break_even *= qsort_break_even; if (qsort_break_even < prev_break) { qsort_break_even = (part_right - part_left) + 1; } #endif } else { qsort_break_even = QSORT_BREAK_EVEN; } #endif if (part_left < pc_left) { /* There are elements on the left which need more processing. Check the right as well before deciding what to do. */ if (pc_right < part_right) { /* We have two partitions to be sorted. Stack the biggest one and process the smallest one on the next iteration. This minimizes the stack height by insuring that any additional stack entries must come from the smallest partition which (because it is smallest) will have the fewest opportunities to generate additional stack entries. */ if ((part_right - pc_right) > (pc_left - part_left)) { /* stack the right partition, process the left */ partition_stack[next_stack_entry].left = pc_right + 1; partition_stack[next_stack_entry].right = part_right; #ifdef QSORT_ORDER_GUESS partition_stack[next_stack_entry].qsort_break_even = qsort_break_even; #endif part_right = pc_left - 1; } else { /* stack the left partition, process the right */ partition_stack[next_stack_entry].left = part_left; partition_stack[next_stack_entry].right = pc_left - 1; #ifdef QSORT_ORDER_GUESS partition_stack[next_stack_entry].qsort_break_even = qsort_break_even; #endif part_left = pc_right + 1; } qsort_assert(next_stack_entry < QSORT_MAX_STACK); ++next_stack_entry; } else { /* The elements on the left are the only remaining elements that need sorting, arrange for them to be processed as the next partition. */ part_right = pc_left - 1; } } else if (pc_right < part_right) { /* There is only one chunk on the right to be sorted, make it the new partition and loop back around. */ part_left = pc_right + 1; } else { /* This whole partition wound up in the pivot chunk, so we need to get a new partition off the stack. */ if (next_stack_entry == 0) { /* the stack is empty - we are done */ break; } --next_stack_entry; part_left = partition_stack[next_stack_entry].left; part_right = partition_stack[next_stack_entry].right; #ifdef QSORT_ORDER_GUESS qsort_break_even = partition_stack[next_stack_entry].qsort_break_even; #endif } } else { /* This partition is too small to fool with qsort complexity, just do an ordinary insertion sort to minimize overhead. */ int i; /* Assume 1st element is in right place already, and start checking at 2nd element to see where it should be inserted. */ for (i = part_left + 1; i <= part_right; ++i) { int j; /* Scan (backwards - just in case 'i' is already in right place) through the elements already sorted to see if the ith element belongs ahead of one of them. */ for (j = i - 1; j >= part_left; --j) { if (qsort_cmp(i, j) >= 0) { /* i belongs right after j */ break; } } ++j; if (j != i) { /* Looks like we really need to move some things */ int k; temp = array[i]; for (k = i - 1; k >= j; --k) array[k + 1] = array[k]; array[j] = temp; } } /* That partition is now sorted, grab the next one, or get out of the loop if there aren't any more. */ if (next_stack_entry == 0) { /* the stack is empty - we are done */ break; } --next_stack_entry; part_left = partition_stack[next_stack_entry].left; part_right = partition_stack[next_stack_entry].right; #ifdef QSORT_ORDER_GUESS qsort_break_even = partition_stack[next_stack_entry].qsort_break_even; #endif } } /* Believe it or not, the array is sorted at this point! */ } #ifndef SMALLSORT #define SMALLSORT (200) #endif /* Stabilize what is, presumably, an otherwise unstable sort method. * We do that by allocating (or having on hand) an array of pointers * that is the same size as the original array of elements to be sorted. * We initialize this parallel array with the addresses of the original * array elements. This indirection can make you crazy. * Some pictures can help. After initializing, we have * * indir list1 * +----+ +----+ * | | --------------> | | ------> first element to be sorted * +----+ +----+ * | | --------------> | | ------> second element to be sorted * +----+ +----+ * | | --------------> | | ------> third element to be sorted * +----+ +----+ * ... * +----+ +----+ * | | --------------> | | ------> n-1st element to be sorted * +----+ +----+ * | | --------------> | | ------> n-th element to be sorted * +----+ +----+ * * During the sort phase, we leave the elements of list1 where they are, * and sort the pointers in the indirect array in the same order determined * by the original comparison routine on the elements pointed to. * Because we don't move the elements of list1 around through * this phase, we can break ties on elements that compare equal * using their address in the list1 array, ensuring stabilty. * This leaves us with something looking like * * indir list1 * +----+ +----+ * | | --+ +---> | | ------> first element to be sorted * +----+ | | +----+ * | | --|-------|---> | | ------> second element to be sorted * +----+ | | +----+ * | | --|-------+ +-> | | ------> third element to be sorted * +----+ | | +----+ * ... * +----+ | | | | +----+ * | | ---|-+ | +--> | | ------> n-1st element to be sorted * +----+ | | +----+ * | | ---+ +----> | | ------> n-th element to be sorted * +----+ +----+ * * where the i-th element of the indirect array points to the element * that should be i-th in the sorted array. After the sort phase, * we have to put the elements of list1 into the places * dictated by the indirect array. */ static SVCOMPARE_t RealCmp; static I32 cmpindir(pTHX_ gptr a, gptr b) { I32 sense; gptr *ap = (gptr *)a; gptr *bp = (gptr *)b; if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0) sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0); return sense; } STATIC void S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) { SV **hintsvp; if (SORTHINTS(hintsvp) & HINT_SORT_FAST) S_qsortsvu(aTHX_ list1, nmemb, cmp); else { register gptr **pp, *q; register size_t n, j, i; gptr *small[SMALLSORT], **indir, tmp; SVCOMPARE_t savecmp; if (nmemb <= 1) return; /* sorted trivially */ /* Small arrays can use the stack, big ones must be allocated */ if (nmemb <= SMALLSORT) indir = small; else { New(1799, indir, nmemb, gptr *); } /* Copy pointers to original array elements into indirect array */ for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++; savecmp = RealCmp; /* Save current comparison routine, if any */ RealCmp = cmp; /* Put comparison routine where cmpindir can find it */ /* sort, with indirection */ S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir); pp = indir; q = list1; for (n = nmemb; n--; ) { /* Assert A: all elements of q with index > n are already * in place. This is vacuosly true at the start, and we * put element n where it belongs below (if it wasn't * already where it belonged). Assert B: we only move * elements that aren't where they belong, * so, by A, we never tamper with elements above n. */ j = pp[n] - q; /* This sets j so that q[j] is * at pp[n]. *pp[j] belongs in * q[j], by construction. */ if (n != j) { /* all's well if n == j */ tmp = q[j]; /* save what's in q[j] */ do { q[j] = *pp[j]; /* put *pp[j] where it belongs */ i = pp[j] - q; /* the index in q of the element * just moved */ pp[j] = q + j; /* this is ok now */ } while ((j = i) != n); /* There are only finitely many (nmemb) addresses * in the pp array. * So we must eventually revisit an index we saw before. * Suppose the first revisited index is k != n. * An index is visited because something else belongs there. * If we visit k twice, then two different elements must * belong in the same place, which cannot be. * So j must get back to n, the loop terminates, * and we put the saved element where it belongs. */ q[n] = tmp; /* put what belongs into * the n-th element */ } } /* free iff allocated */ if (indir != small) { Safefree(indir); } /* restore prevailing comparison routine */ RealCmp = savecmp; } } /* =for apidoc sortsv Sort an array. Here is an example: sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale); =cut */ void Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) { void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) = S_mergesortsv; SV **hintsvp; I32 hints; if ((hints = SORTHINTS(hintsvp))) { if (hints & HINT_SORT_QUICKSORT) sortsvp = S_qsortsv; else { if (hints & HINT_SORT_MERGESORT) sortsvp = S_mergesortsv; else sortsvp = S_mergesortsv; } } sortsvp(aTHX_ array, nmemb, cmp); } PP(pp_sort) { dSP; dMARK; dORIGMARK; register SV **up; SV **myorigmark = ORIGMARK; register I32 max; HV *stash; GV *gv; CV *cv = 0; I32 gimme = GIMME; OP* nextop = PL_op->op_next; I32 overloading = 0; bool hasargs = FALSE; I32 is_xsub = 0; if (gimme != G_ARRAY) { SP = MARK; RETPUSHUNDEF; } ENTER; SAVEVPTR(PL_sortcop); if (PL_op->op_flags & OPf_STACKED) { if (PL_op->op_flags & OPf_SPECIAL) { OP *kid = cLISTOP->op_first->op_sibling; /* pass pushmark */ kid = kUNOP->op_first; /* pass rv2gv */ kid = kUNOP->op_first; /* pass leave */ PL_sortcop = kid->op_next; stash = CopSTASH(PL_curcop); } else { cv = sv_2cv(*++MARK, &stash, &gv, 0); if (cv && SvPOK(cv)) { STRLEN n_a; char *proto = SvPV((SV*)cv, n_a); if (proto && strEQ(proto, "$$")) { hasargs = TRUE; } } if (!(cv && CvROOT(cv))) { if (cv && CvXSUB(cv)) { is_xsub = 1; } else if (gv) { SV *tmpstr = sv_newmortal(); gv_efullname3(tmpstr, gv, Nullch); DIE(aTHX_ "Undefined sort subroutine \"%s\" called", SvPVX(tmpstr)); } else { DIE(aTHX_ "Undefined subroutine in sort"); } } if (is_xsub) PL_sortcop = (OP*)cv; else { PL_sortcop = CvSTART(cv); SAVEVPTR(CvROOT(cv)->op_ppaddr); CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL]; SAVEVPTR(PL_curpad); PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]); } } } else { PL_sortcop = Nullop; stash = CopSTASH(PL_curcop); } up = myorigmark + 1; while (MARK < SP) { /* This may or may not shift down one here. */ /*SUPPRESS 560*/ if ((*up = *++MARK)) { /* Weed out nulls. */ SvTEMP_off(*up); if (!PL_sortcop && !SvPOK(*up)) { STRLEN n_a; if (SvAMAGIC(*up)) overloading = 1; else (void)sv_2pv(*up, &n_a); } up++; } } max = --up - myorigmark; if (PL_sortcop) { if (max > 1) { PERL_CONTEXT *cx; SV** newsp; bool oldcatch = CATCH_GET; SAVETMPS; SAVEOP(); CATCH_SET(TRUE); PUSHSTACKi(PERLSI_SORT); if (!hasargs && !is_xsub) { if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) { SAVESPTR(PL_firstgv); SAVESPTR(PL_secondgv); PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV); PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV); PL_sortstash = stash; } #ifdef USE_5005THREADS sv_lock((SV *)PL_firstgv); sv_lock((SV *)PL_secondgv); #endif SAVESPTR(GvSV(PL_firstgv)); SAVESPTR(GvSV(PL_secondgv)); } PUSHBLOCK(cx, CXt_NULL, PL_stack_base); if (!(PL_op->op_flags & OPf_SPECIAL)) { cx->cx_type = CXt_SUB; cx->blk_gimme = G_SCALAR; PUSHSUB(cx); if (!CvDEPTH(cv)) (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */ } PL_sortcxix = cxstack_ix; if (hasargs && !is_xsub) { /* This is mostly copied from pp_entersub */ AV *av = (AV*)PL_curpad[0]; #ifndef USE_5005THREADS cx->blk_sub.savearray = GvAV(PL_defgv); GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av); #endif /* USE_5005THREADS */ cx->blk_sub.oldcurpad = PL_curpad; cx->blk_sub.argarray = av; } sortsv((myorigmark+1), max, is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv); POPBLOCK(cx,PL_curpm); PL_stack_sp = newsp; POPSTACK; CATCH_SET(oldcatch); } } else { if (max > 1) { MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */ sortsv(ORIGMARK+1, max, (PL_op->op_private & OPpSORT_NUMERIC) ? ( (PL_op->op_private & OPpSORT_INTEGER) ? ( overloading ? amagic_i_ncmp : sv_i_ncmp) : ( overloading ? amagic_ncmp : sv_ncmp)) : ( IN_LOCALE_RUNTIME ? ( overloading ? amagic_cmp_locale : sv_cmp_locale_static) : ( overloading ? amagic_cmp : sv_cmp_static))); if (PL_op->op_private & OPpSORT_REVERSE) { SV **p = ORIGMARK+1; SV **q = ORIGMARK+max; while (p < q) { SV *tmp = *p; *p++ = *q; *q-- = tmp; } } } } LEAVE; PL_stack_sp = ORIGMARK + max; return nextop; } static I32 sortcv(pTHX_ SV *a, SV *b) { I32 oldsaveix = PL_savestack_ix; I32 oldscopeix = PL_scopestack_ix; I32 result; GvSV(PL_firstgv) = a; GvSV(PL_secondgv) = b; PL_stack_sp = PL_stack_base; PL_op = PL_sortcop; CALLRUNOPS(aTHX); if (PL_stack_sp != PL_stack_base + 1) Perl_croak(aTHX_ "Sort subroutine didn't return single value"); if (!SvNIOKp(*PL_stack_sp)) Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value"); result = SvIV(*PL_stack_sp); while (PL_scopestack_ix > oldscopeix) { LEAVE; } leave_scope(oldsaveix); return result; } static I32 sortcv_stacked(pTHX_ SV *a, SV *b) { I32 oldsaveix = PL_savestack_ix; I32 oldscopeix = PL_scopestack_ix; I32 result; AV *av; #ifdef USE_5005THREADS av = (AV*)PL_curpad[0]; #else av = GvAV(PL_defgv); #endif if (AvMAX(av) < 1) { SV** ary = AvALLOC(av); if (AvARRAY(av) != ary) { AvMAX(av) += AvARRAY(av) - AvALLOC(av); SvPVX(av) = (char*)ary; } if (AvMAX(av) < 1) { AvMAX(av) = 1; Renew(ary,2,SV*); SvPVX(av) = (char*)ary; } } AvFILLp(av) = 1; AvARRAY(av)[0] = a; AvARRAY(av)[1] = b; PL_stack_sp = PL_stack_base; PL_op = PL_sortcop; CALLRUNOPS(aTHX); if (PL_stack_sp != PL_stack_base + 1) Perl_croak(aTHX_ "Sort subroutine didn't return single value"); if (!SvNIOKp(*PL_stack_sp)) Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value"); result = SvIV(*PL_stack_sp); while (PL_scopestack_ix > oldscopeix) { LEAVE; } leave_scope(oldsaveix); return result; } static I32 sortcv_xsub(pTHX_ SV *a, SV *b) { dSP; I32 oldsaveix = PL_savestack_ix; I32 oldscopeix = PL_scopestack_ix; I32 result; CV *cv=(CV*)PL_sortcop; SP = PL_stack_base; PUSHMARK(SP); EXTEND(SP, 2); *++SP = a; *++SP = b; PUTBACK; (void)(*CvXSUB(cv))(aTHX_ cv); if (PL_stack_sp != PL_stack_base + 1) Perl_croak(aTHX_ "Sort subroutine didn't return single value"); if (!SvNIOKp(*PL_stack_sp)) Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value"); result = SvIV(*PL_stack_sp); while (PL_scopestack_ix > oldscopeix) { LEAVE; } leave_scope(oldsaveix); return result; } static I32 sv_ncmp(pTHX_ SV *a, SV *b) { NV nv1 = SvNV(a); NV nv2 = SvNV(b); return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0; } static I32 sv_i_ncmp(pTHX_ SV *a, SV *b) { IV iv1 = SvIV(a); IV iv2 = SvIV(b); return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0; } #define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \ *svp = Nullsv; \ if (PL_amagic_generation) { \ if (SvAMAGIC(left)||SvAMAGIC(right))\ *svp = amagic_call(left, \ right, \ CAT2(meth,_amg), \ 0); \ } \ } STMT_END static I32 amagic_ncmp(pTHX_ register SV *a, register SV *b) { SV *tmpsv; tryCALL_AMAGICbin(a,b,ncmp,&tmpsv); if (tmpsv) { NV d; if (SvIOK(tmpsv)) { I32 i = SvIVX(tmpsv); if (i > 0) return 1; return i? -1 : 0; } d = SvNV(tmpsv); if (d > 0) return 1; return d? -1 : 0; } return sv_ncmp(aTHX_ a, b); } static I32 amagic_i_ncmp(pTHX_ register SV *a, register SV *b) { SV *tmpsv; tryCALL_AMAGICbin(a,b,ncmp,&tmpsv); if (tmpsv) { NV d; if (SvIOK(tmpsv)) { I32 i = SvIVX(tmpsv); if (i > 0) return 1; return i? -1 : 0; } d = SvNV(tmpsv); if (d > 0) return 1; return d? -1 : 0; } return sv_i_ncmp(aTHX_ a, b); } static I32 amagic_cmp(pTHX_ register SV *str1, register SV *str2) { SV *tmpsv; tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv); if (tmpsv) { NV d; if (SvIOK(tmpsv)) { I32 i = SvIVX(tmpsv); if (i > 0) return 1; return i? -1 : 0; } d = SvNV(tmpsv); if (d > 0) return 1; return d? -1 : 0; } return sv_cmp(str1, str2); } static I32 amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2) { SV *tmpsv; tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv); if (tmpsv) { NV d; if (SvIOK(tmpsv)) { I32 i = SvIVX(tmpsv); if (i > 0) return 1; return i? -1 : 0; } d = SvNV(tmpsv); if (d > 0) return 1; return d? -1 : 0; } return sv_cmp_locale(str1, str2); }