summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/multiarray/item_selection.c3
-rw-r--r--numpy/core/src/npysort/npysort_common.h8
-rw-r--r--numpy/core/src/npysort/timsort.c.src2170
3 files changed, 1922 insertions, 259 deletions
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 6265aa207..7f560363c 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -1289,6 +1289,9 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
case NPY_MERGESORT:
argsort = npy_amergesort;
break;
+ case NPY_TIMSORT:
+ argsort = npy_atimsort;
+ break;
}
}
else {
diff --git a/numpy/core/src/npysort/npysort_common.h b/numpy/core/src/npysort/npysort_common.h
index a22045b41..5fd03b96f 100644
--- a/numpy/core/src/npysort/npysort_common.h
+++ b/numpy/core/src/npysort/npysort_common.h
@@ -273,10 +273,10 @@ STRING_SWAP(char *s1, char *s2, size_t len)
NPY_INLINE static int
-STRING_LT(char *s1, char *s2, size_t len)
+STRING_LT(const char *s1, const char *s2, size_t len)
{
- const unsigned char *c1 = (unsigned char *)s1;
- const unsigned char *c2 = (unsigned char *)s2;
+ const unsigned char *c1 = (const unsigned char *)s1;
+ const unsigned char *c2 = (const unsigned char *)s2;
size_t i;
int ret = 0;
@@ -311,7 +311,7 @@ UNICODE_SWAP(npy_ucs4 *s1, npy_ucs4 *s2, size_t len)
NPY_INLINE static int
-UNICODE_LT(npy_ucs4 *s1, npy_ucs4 *s2, size_t len)
+UNICODE_LT(const npy_ucs4 *s1, const npy_ucs4 *s2, size_t len)
{
size_t i;
int ret = 0;
diff --git a/numpy/core/src/npysort/timsort.c.src b/numpy/core/src/npysort/timsort.c.src
index cf2377555..1fd86ff97 100644
--- a/numpy/core/src/npysort/timsort.c.src
+++ b/numpy/core/src/npysort/timsort.c.src
@@ -26,21 +26,22 @@
* The heap sort is included for completeness.
*/
+
+/* For details of Timsort, refer to
+ * https://github.com/python/cpython/blob/3.7/Objects/listsort.txt
+ */
+
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "npy_sort.h"
#include "npysort_common.h"
#include <stdlib.h>
-#define NOT_USED NPY_UNUSED(unused)
-#define PYA_QS_STACK 100
-#define SMALL_QUICKSORT 15
-#define SMALL_MERGESORT 20
-#define SMALL_STRING 16
-
+/* enough for 32 * 1.618 ** 128 elements */
#define TIMSORT_STACK_SIZE 128
+
npy_intp compute_min_run(npy_intp num)
{
npy_intp r = 0;
@@ -54,11 +55,41 @@ npy_intp compute_min_run(npy_intp num)
}
typedef struct {
- npy_intp s;
- npy_intp l;
+ npy_intp s; /* start pointer */
+ npy_intp l; /* length */
} run;
+/* buffer for argsort. Declared here to avoid multiple declarations. */
+typedef struct {
+ npy_intp *pw;
+ npy_intp size;
+} buffer_intp;
+
+
+/* buffer method */
+static NPY_INLINE int
+resize_buffer_intp(buffer_intp *buffer, npy_intp new_size)
+{
+ if (new_size <= buffer->size) {
+ return 0;
+ }
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ buffer->pw = malloc(new_size * sizeof(npy_intp));
+ } else {
+ buffer->pw = realloc(buffer->pw, new_size * sizeof(npy_intp));
+ }
+
+ buffer->size = new_size;
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ return -NPY_ENOMEM;
+ } else {
+ return 0;
+ }
+}
+
/*
*****************************************************************************
** NUMERIC SORTS **
@@ -122,14 +153,12 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun)
pl = arr + l;
- // (not strictly) ascending sequence
+ /* (not strictly) ascending sequence */
if (!@TYPE@_LT(*(pl + 1), *pl)) {
- for (pi = pl + 1; pi < pl + num - 1 && !@TYPE@_LT(*(pi + 1), *pi); ++pi) {
+ for (pi = pl + 1; pi < arr + num - 1 && !@TYPE@_LT(*(pi + 1), *pi); ++pi) {
}
-
- // (strictly) descending sequence
- } else {
- for (pi = pl + 1; pi < pl + num - 1 && @TYPE@_LT(*(pi + 1), *pi); ++pi) {
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + 1; pi < arr + num - 1 && @TYPE@_LT(*(pi + 1), *pi); ++pi) {
}
for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
@@ -137,7 +166,8 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun)
}
}
- sz = pi - pl + 1;
+ ++pi;
+ sz = pi - pl;
if (sz < minrun) {
if (l + minrun < num) {
@@ -148,6 +178,7 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun)
pr = pl + sz;
+ /* insertion sort */
for (; pi < pr; ++pi) {
vc = *pi;
pj = pi;
@@ -165,6 +196,9 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun)
}
+/* when the left part of the array (p1) is smaller, copy p1 to buffer
+ * and merge from left to right
+ */
static void
merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
@type@ *p3)
@@ -188,6 +222,9 @@ merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
}
+/* when the right part of the array (p2) is smaller, copy p2 to buffer
+ * and merge from right to left
+ */
static void
merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
@type@ *p3)
@@ -243,6 +280,7 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key)
break;
} else {
last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
ofs = (ofs << 1) + 1;
}
}
@@ -319,7 +357,10 @@ merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at,
l1 = stack[at].l;
s2 = stack[at + 1].s;
l2 = stack[at + 1].l;
- /* arr[s2] belongs to arr[s1+k] */
+ /* arr[s2] belongs to arr[s1+k].
+ * if try to comment this out for debugging purpose, remember
+ * in the merging process the first element is skipped
+ */
k = gallop_right_@suff@(arr + s1, l1, arr[s2]);
if (l1 == k) {
@@ -436,32 +477,30 @@ force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
int
-timsort_@suff@(void *start, npy_intp num, void *NOT_USED)
+timsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr))
{
int ret;
npy_intp l, n, stack_ptr, minrun;
buffer_@suff@ buffer;
- @type@ *arr;
run stack[TIMSORT_STACK_SIZE];
buffer.pw = NULL;
buffer.size = 0;
stack_ptr = 0;
minrun = compute_min_run(num);
- arr = start;
for (l = 0; l < num;) {
- n = count_run_@suff@(arr, l, num, minrun);
+ n = count_run_@suff@(start, l, num, minrun);
stack[stack_ptr].s = l;
stack[stack_ptr].l = n;
++stack_ptr;
- ret = try_collapse_@suff@(arr, stack, &stack_ptr, &buffer);
+ ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer);
if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
l += n;
}
- ret = force_collapse_@suff@(arr, stack, &stack_ptr, &buffer);
+ ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer);
if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
@@ -476,78 +515,407 @@ cleanup:
}
-/* Below copied directly from mergesort */
+/* argsort */
-static void
-amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw)
+static npy_intp
+acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num,
+ npy_intp minrun)
{
- @type@ vp;
- npy_intp vi, *pi, *pj, *pk, *pm;
+ npy_intp sz;
+ @type@ vc;
+ npy_intp vi;
+ npy_intp *pl, *pi, *pj, *pr;
- if (pr - pl > SMALL_MERGESORT) {
- /* merge sort */
- pm = pl + ((pr - pl) >> 1);
- amergesort0_@suff@(pl, pm, v, pw);
- amergesort0_@suff@(pm, pr, v, pw);
+ if (NPY_UNLIKELY(num - l == 1)) {
+ return 1;
+ }
- for (pi = pw, pj = pl; pj < pm;) {
- *pi++ = *pj++;
- }
+ pl = tosort + l;
- pi = pw + (pm - pl);
- pj = pw;
- pk = pl;
+ /* (not strictly) ascending sequence */
+ if (!@TYPE@_LT(arr[*(pl + 1)], arr[*pl])) {
+ for (pi = pl + 1; pi < tosort + num - 1
+ && !@TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) {
+ }
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + 1; pi < tosort + num - 1
+ && @TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) {
+ }
- while (pj < pi && pm < pr) {
- if (@TYPE@_LT(v[*pm], v[*pj])) {
- *pk++ = *pm++;
- } else {
- *pk++ = *pj++;
- }
+ for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
+ INTP_SWAP(*pj, *pr);
}
+ }
+
+ ++pi;
+ sz = pi - pl;
- while (pj < pi) {
- *pk++ = *pj++;
+ if (sz < minrun) {
+ if (l + minrun < num) {
+ sz = minrun;
+ } else {
+ sz = num - l;
}
- } else {
+
+ pr = pl + sz;
+
/* insertion sort */
- for (pi = pl + 1; pi < pr; ++pi) {
+ for (; pi < pr; ++pi) {
vi = *pi;
- vp = v[vi];
+ vc = arr[*pi];
pj = pi;
- pk = pi - 1;
- while (pj > pl && @TYPE@_LT(vp, v[*pk])) {
- *pj-- = *pk--;
+ while (pl < pj && @TYPE@_LT(vc, arr[*(pj - 1)])) {
+ *pj = *(pj - 1);
+ --pj;
}
*pj = vi;
}
}
+
+ return sz;
}
-int
-atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *NOT_USED)
+static npy_intp
+agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort,
+ const npy_intp size, const @type@ key)
{
- npy_intp *pl, *pr, *pw;
- pl = tosort;
- pr = pl + num;
- pw = malloc((num / 2) * sizeof(npy_intp));
+ npy_intp last_ofs, ofs, m;
- if (pw == NULL) {
- return -NPY_ENOMEM;
+ if (@TYPE@_LT(key, arr[tosort[0]])) {
+ return 0;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size; /* arr[ofs] is never accessed */
+ break;
+ }
+
+ if (@TYPE@_LT(key, arr[tosort[ofs]])) {
+ break;
+ } else {
+ last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[last_ofs]] <= key < arr[tosort[ofs]] */
+ while (last_ofs + 1 < ofs) {
+ m = last_ofs + ((ofs - last_ofs) >> 1);
+
+ if (@TYPE@_LT(key, arr[tosort[m]])) {
+ ofs = m;
+ } else {
+ last_ofs = m;
+ }
+ }
+
+ /* now that arr[tosort[ofs-1]] <= key < arr[tosort[ofs]] */
+ return ofs;
+}
+
+
+
+static npy_intp
+agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort,
+ const npy_intp size, const @type@ key)
+{
+ npy_intp last_ofs, ofs, l, m, r;
+
+ if (@TYPE@_LT(arr[tosort[size - 1]], key)) {
+ return size;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size;
+ break;
+ }
+
+ if (@TYPE@_LT(arr[tosort[size - ofs - 1]], key)) {
+ break;
+ } else {
+ last_ofs = ofs;
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] */
+ l = size - ofs - 1;
+ r = size - last_ofs - 1;
+
+ while (l + 1 < r) {
+ m = l + ((r - l) >> 1);
+
+ if (@TYPE@_LT(arr[tosort[m]], key)) {
+ l = m;
+ } else {
+ r = m;
+ }
+ }
+
+ /* now that arr[tosort[r-1]] < key <= arr[tosort[r]] */
+ return r;
+}
+
+
+static void
+amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2,
+ npy_intp *p3)
+{
+ npy_intp *end = p2 + l2;
+ memcpy(p3, p1, sizeof(npy_intp) * l1);
+ /* first element must be in p2 otherwise skipped in the caller */
+ *p1++ = *p2++;
+
+ while (p1 < p2 && p2 < end) {
+ if (@TYPE@_LT(arr[*p2], arr[*p3])) {
+ *p1++ = *p2++;
+ } else {
+ *p1++ = *p3++;
+ }
+ }
+
+ if (p1 != p2) {
+ memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
+ }
+}
+
+
+static void
+amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2,
+ npy_intp *p3)
+{
+ npy_intp ofs;
+ npy_intp *start = p1 - 1;
+ memcpy(p3, p2, sizeof(npy_intp) * l2);
+ p1 += l1 - 1;
+ p2 += l2 - 1;
+ p3 += l2 - 1;
+ /* first element must be in p1 otherwise skipped in the caller */
+ *p2-- = *p1--;
+
+ while (p1 < p2 && start < p1) {
+ if (@TYPE@_LT(arr[*p3], arr[*p1])) {
+ *p2-- = *p1--;
+ } else {
+ *p2-- = *p3--;
+ }
+ }
+
+ if (p1 != p2) {
+ ofs = p2 - start;
+ memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
+ }
+}
+
+
+static int
+amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack,
+ const npy_intp at,
+ buffer_intp *buffer)
+{
+ int ret;
+ npy_intp s1, l1, s2, l2, k;
+ npy_intp *p1, *p2;
+ s1 = stack[at].s;
+ l1 = stack[at].l;
+ s2 = stack[at + 1].s;
+ l2 = stack[at + 1].l;
+ /* tosort[s2] belongs to tosort[s1+k] */
+ k = agallop_right_@suff@(arr, tosort + s1, l1, arr[tosort[s2]]);
+
+ if (l1 == k) {
+ /* already sorted */
+ return 0;
+ }
+
+ p1 = tosort + s1 + k;
+ l1 -= k;
+ p2 = tosort + s2;
+ /* tosort[s2-1] belongs to tosort[s2+l2] */
+ l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr[tosort[s2 - 1]]);
+
+ if (l2 < l1) {
+ ret = resize_buffer_intp(buffer, l2);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw);
+ } else {
+ ret = resize_buffer_intp(buffer, l1);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw);
+ }
+
+ return 0;
+}
+
+
+static int
+atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr,
+ buffer_intp *buffer)
+{
+ int ret;
+ npy_intp A, B, C, top;
+ top = *stack_ptr;
+
+ while (1 < top) {
+ B = stack[top - 2].l;
+ C = stack[top - 1].l;
+
+ if ((2 < top && stack[top - 3].l <= B + C) ||
+ (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
+ A = stack[top - 3].l;
+
+ if (A <= C) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += B;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ }
+ } else if (1 < top && B <= C) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ } else {
+ break;
+ }
+ }
+
+ *stack_ptr = top;
+ return 0;
+}
+
+
+static int
+aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr,
+ buffer_intp *buffer)
+{
+ int ret;
+ npy_intp top = *stack_ptr;
+
+ while (2 < top) {
+ if (stack[top - 3].l <= stack[top - 1].l) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += stack[top - 2].l;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += stack[top - 1].l;
+ --top;
+ }
+ }
+
+ if (1 < top) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
}
- amergesort0_@suff@(pl, pr, v, pw);
- free(pw);
return 0;
}
+
+int
+atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num,
+ void *NPY_UNUSED(varr))
+{
+ int ret;
+ npy_intp l, n, stack_ptr, minrun;
+ buffer_intp buffer;
+ run stack[TIMSORT_STACK_SIZE];
+ buffer.pw = NULL;
+ buffer.size = 0;
+ stack_ptr = 0;
+ minrun = compute_min_run(num);
+
+ for (l = 0; l < num;) {
+ n = acount_run_@suff@(v, tosort, l, num, minrun);
+ stack[stack_ptr].s = l;
+ stack[stack_ptr].l = n;
+ ++stack_ptr;
+ ret = atry_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ l += n;
+ }
+
+ ret = aforce_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ ret = 0;
+cleanup:
+
+ if (buffer.pw != NULL) {
+ free(buffer.pw);
+ }
+
+ return ret;
+}
+
/**end repeat**/
+
+/* For string sorts and generic sort, element comparisons are very expensive,
+ * and the time cost of insertion sort (involves N**2 comparison) clearly hurts.
+ * Implementing binary insertion sort and probably gallop mode during merging process
+ * can hopefully boost the performance. Here as a temporary workaround we use shorter
+ * run length to reduce the cost of insertion sort.
+ */
+
+npy_intp compute_min_run_short(npy_intp num)
+{
+ npy_intp r = 0;
+
+ while (16 < num) {
+ r |= num & 1;
+ num >>= 1;
+ }
+
+ return num + r;
+}
+
/*
*****************************************************************************
** STRING SORTS **
@@ -562,171 +930,809 @@ atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *NOT_USED)
* #type = npy_char, npy_ucs4#
*/
-static void
-mergesort0_@suff@(@type@ *pl, @type@ *pr, @type@ *pw, @type@ *vp, size_t len)
-{
- @type@ *pi, *pj, *pk, *pm;
-
- if ((size_t)(pr - pl) > SMALL_MERGESORT * len) {
- /* merge sort */
- pm = pl + (((pr - pl) / len) >> 1) * len;
- mergesort0_@suff@(pl, pm, pw, vp, len);
- mergesort0_@suff@(pm, pr, pw, vp, len);
- @TYPE@_COPY(pw, pl, pm - pl);
- pi = pw + (pm - pl);
- pj = pw;
- pk = pl;
-
- while (pj < pi && pm < pr) {
- if (@TYPE@_LT(pm, pj, len)) {
- @TYPE@_COPY(pk, pm, len);
- pm += len;
- pk += len;
- } else {
- @TYPE@_COPY(pk, pj, len);
- pj += len;
- pk += len;
- }
- }
- @TYPE@_COPY(pk, pj, pi - pj);
+typedef struct {
+ @type@ * pw;
+ npy_intp size;
+ size_t len;
+} buffer_@suff@;
+
+
+static NPY_INLINE int
+resize_buffer_@suff@(buffer_@suff@ *buffer, npy_intp new_size)
+{
+ if (new_size <= buffer->size) {
+ return 0;
+ }
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ buffer->pw = malloc(sizeof(@type@) * new_size * buffer->len);
} else {
+ buffer->pw = realloc(buffer->pw, sizeof(@type@) * new_size * buffer->len);
+ }
+
+ buffer->size = new_size;
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ return -NPY_ENOMEM;
+ } else {
+ return 0;
+ }
+}
+
+
+static npy_intp
+count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun,
+ @type@ *vp, size_t len)
+{
+ npy_intp sz;
+ @type@ *pl, *pi, *pj, *pr;
+
+ if (NPY_UNLIKELY(num - l == 1)) {
+ return 1;
+ }
+
+ pl = arr + l * len;
+
+ /* (not strictly) ascending sequence */
+ if (!@TYPE@_LT(pl + len, pl, len)) {
+ for (pi = pl + len; pi < arr + (num - 1) * len
+ && !@TYPE@_LT(pi + len, pi, len); pi += len) {
+ }
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + len; pi < arr + (num - 1) * len
+ && @TYPE@_LT(pi + len, pi, len); pi += len) {
+ }
+
+ for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) {
+ @TYPE@_SWAP(pj, pr, len);
+ }
+ }
+
+ pi += len;
+ sz = (pi - pl) / len;
+
+ if (sz < minrun) {
+ if (l + minrun < num) {
+ sz = minrun;
+ } else {
+ sz = num - l;
+ }
+
+ pr = pl + sz * len;
+
/* insertion sort */
- for (pi = pl + len; pi < pr; pi += len) {
+ for (; pi < pr; pi += len) {
@TYPE@_COPY(vp, pi, len);
pj = pi;
- pk = pi - len;
- while (pj > pl && @TYPE@_LT(vp, pk, len)) {
- @TYPE@_COPY(pj, pk, len);
+ while (pl < pj && @TYPE@_LT(vp, pj - len, len)) {
+ @TYPE@_COPY(pj, pj - len, len);
pj -= len;
- pk -= len;
}
@TYPE@_COPY(pj, vp, len);
}
}
+
+ return sz;
}
-int
-timsort_@suff@(void *start, npy_intp num, void *varr)
+static npy_intp
+gallop_right_@suff@(const @type@ *arr, const npy_intp size,
+ const @type@ *key, size_t len)
{
- PyArrayObject *arr = varr;
- size_t elsize = PyArray_ITEMSIZE(arr);
- size_t len = elsize / sizeof(@type@);
- @type@ *pl, *pr, *pw, *vp;
- int err = 0;
+ npy_intp last_ofs, ofs, m;
- /* Items that have zero size don't make sense to sort */
- if (elsize == 0) {
+ if (@TYPE@_LT(key, arr, len)) {
return 0;
}
- pl = start;
- pr = pl + num * len;
- pw = malloc((num / 2) * elsize);
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size; /* arr[ofs] is never accessed */
+ break;
+ }
- if (pw == NULL) {
- err = -NPY_ENOMEM;
- goto fail_0;
+ if (@TYPE@_LT(key, arr + ofs * len, len)) {
+ break;
+ } else {
+ last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
+ ofs = (ofs << 1) + 1;
+ }
}
- vp = malloc(elsize);
+ /* now that arr[last_ofs*len] <= key < arr[ofs*len] */
+ while (last_ofs + 1 < ofs) {
+ m = last_ofs + ((ofs - last_ofs) >> 1);
- if (vp == NULL) {
- err = -NPY_ENOMEM;
- goto fail_1;
+ if (@TYPE@_LT(key, arr + m * len, len)) {
+ ofs = m;
+ } else {
+ last_ofs = m;
+ }
}
- mergesort0_@suff@(pl, pr, pw, vp, len);
- free(vp);
-fail_1:
- free(pw);
-fail_0:
- return err;
+ /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */
+ return ofs;
}
-static void
-amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw,
+
+static npy_intp
+gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key,
size_t len)
{
- @type@ *vp;
- npy_intp vi, *pi, *pj, *pk, *pm;
+ npy_intp last_ofs, ofs, l, m, r;
+
+ if (@TYPE@_LT(arr + (size - 1) * len, key, len)) {
+ return size;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size;
+ break;
+ }
+
+ if (@TYPE@_LT(arr + (size - ofs - 1) * len, key, len)) {
+ break;
+ } else {
+ last_ofs = ofs;
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */
+ l = size - ofs - 1;
+ r = size - last_ofs - 1;
+
+ while (l + 1 < r) {
+ m = l + ((r - l) >> 1);
+
+ if (@TYPE@_LT(arr + m * len, key, len)) {
+ l = m;
+ } else {
+ r = m;
+ }
+ }
+
+ /* now that arr[(r-1)*len] < key <= arr[r*len] */
+ return r;
+}
+
+
+static void
+merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
+ @type@ *p3, size_t len)
+{
+ @type@ *end = p2 + l2 * len;
+ memcpy(p3, p1, sizeof(@type@) * l1 * len);
+ /* first element must be in p2 otherwise skipped in the caller */
+ @TYPE@_COPY(p1, p2, len);
+ p1 += len;
+ p2 += len;
+
+ while (p1 < p2 && p2 < end) {
+ if (@TYPE@_LT(p2, p3, len)) {
+ @TYPE@_COPY(p1, p2, len);
+ p1 += len;
+ p2 += len;
+ } else {
+ @TYPE@_COPY(p1, p3, len);
+ p1 += len;
+ p3 += len;
+ }
+ }
+
+ if (p1 != p2) {
+ memcpy(p1, p3, sizeof(@type@) * (p2 - p1));
+ }
+}
+
- if (pr - pl > SMALL_MERGESORT) {
- /* merge sort */
- pm = pl + ((pr - pl) >> 1);
- amergesort0_@suff@(pl, pm, v, pw, len);
- amergesort0_@suff@(pm, pr, v, pw, len);
+static void
+merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
+ @type@ *p3, size_t len)
+{
+ npy_intp ofs;
+ @type@ *start = p1 - len;
+ memcpy(p3, p2, sizeof(@type@) * l2 * len);
+ p1 += (l1 - 1) * len;
+ p2 += (l2 - 1) * len;
+ p3 += (l2 - 1) * len;
+ /* first element must be in p1 otherwise skipped in the caller */
+ @TYPE@_COPY(p2, p1, len);
+ p2 -= len;
+ p1 -= len;
- for (pi = pw, pj = pl; pj < pm;) {
- *pi++ = *pj++;
+ while (p1 < p2 && start < p1) {
+ if (@TYPE@_LT(p3, p1, len)) {
+ @TYPE@_COPY(p2, p1, len);
+ p2 -= len;
+ p1 -= len;
+ } else {
+ @TYPE@_COPY(p2, p3, len);
+ p2 -= len;
+ p3 -= len;
}
+ }
+
+ if (p1 != p2) {
+ ofs = p2 - start;
+ memcpy(start + len, p3 - ofs + len, sizeof(@type@) * ofs);
+ }
+}
+
+
+static int
+merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at,
+ buffer_@suff@ *buffer, size_t len)
+{
+ int ret;
+ npy_intp s1, l1, s2, l2, k;
+ @type@ *p1, *p2;
+ s1 = stack[at].s;
+ l1 = stack[at].l;
+ s2 = stack[at + 1].s;
+ l2 = stack[at + 1].l;
+ /* arr[s2] belongs to arr[s1+k] */
+ @TYPE@_COPY(buffer->pw, arr + s2 * len, len);
+ k = gallop_right_@suff@(arr + s1 * len, l1, buffer->pw, len);
+
+ if (l1 == k) {
+ /* already sorted */
+ return 0;
+ }
+
+ p1 = arr + (s1 + k) * len;
+ l1 -= k;
+ p2 = arr + s2 * len;
+ /* arr[s2-1] belongs to arr[s2+l2] */
+ @TYPE@_COPY(buffer->pw, arr + (s2 - 1) * len, len);
+ l2 = gallop_left_@suff@(arr + s2 * len, l2, buffer->pw, len);
+
+ if (l2 < l1) {
+ ret = resize_buffer_@suff@(buffer, l2);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ merge_right_@suff@(p1, l1, p2, l2, buffer->pw, len);
+ } else {
+ ret = resize_buffer_@suff@(buffer, l1);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
- pi = pw + (pm - pl);
- pj = pw;
- pk = pl;
+ merge_left_@suff@(p1, l1, p2, l2, buffer->pw, len);
+ }
+
+ return 0;
+}
+
+
+static int
+try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
+ buffer_@suff@ *buffer, size_t len)
+{
+ int ret;
+ npy_intp A, B, C, top;
+ top = *stack_ptr;
+
+ while (1 < top) {
+ B = stack[top - 2].l;
+ C = stack[top - 1].l;
+
+ if ((2 < top && stack[top - 3].l <= B + C) ||
+ (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
+ A = stack[top - 3].l;
+
+ if (A <= C) {
+ ret = merge_at_@suff@(arr, stack, top - 3, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
- while (pj < pi && pm < pr) {
- if (@TYPE@_LT(v + (*pm)*len, v + (*pj)*len, len)) {
- *pk++ = *pm++;
+ stack[top - 3].l += B;
+ stack[top - 2] = stack[top - 1];
+ --top;
} else {
- *pk++ = *pj++;
+ ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
}
+ } else if (1 < top && B <= C) {
+ ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ } else {
+ break;
}
+ }
+
+ *stack_ptr = top;
+ return 0;
+}
+
- while (pj < pi) {
- *pk++ = *pj++;
+static int
+force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
+ buffer_@suff@ *buffer, size_t len)
+{
+ int ret;
+ npy_intp top = *stack_ptr;
+
+ while (2 < top) {
+ if (stack[top - 3].l <= stack[top - 1].l) {
+ ret = merge_at_@suff@(arr, stack, top - 3, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += stack[top - 2].l;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += stack[top - 1].l;
+ --top;
}
- } else {
+ }
+
+ if (1 < top) {
+ ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+ }
+
+ return 0;
+}
+
+
+int
+timsort_@suff@(void *start, npy_intp num, void *varr)
+{
+ PyArrayObject *arr = varr;
+ size_t elsize = PyArray_ITEMSIZE(arr);
+ size_t len = elsize / sizeof(@type@);
+ int ret;
+ npy_intp l, n, stack_ptr, minrun;
+ run stack[TIMSORT_STACK_SIZE];
+ buffer_@suff@ buffer;
+ buffer.pw = NULL;
+ buffer.size = 0;
+ buffer.len = len;
+ stack_ptr = 0;
+ minrun = compute_min_run_short(num);
+ /* used for insertion sort and gallop key */
+ ret = resize_buffer_@suff@(&buffer, 1);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ for (l = 0; l < num;) {
+ n = count_run_@suff@(start, l, num, minrun, buffer.pw, len);
+ /* both s and l are scaled by len */
+ stack[stack_ptr].s = l;
+ stack[stack_ptr].l = n;
+ ++stack_ptr;
+ ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ l += n;
+ }
+
+ ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ ret = 0;
+cleanup:
+
+ if (buffer.pw != NULL) {
+ free(buffer.pw);
+ }
+
+ return ret;
+}
+
+
+/* argsort */
+
+
+static npy_intp
+acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num,
+ npy_intp minrun, size_t len)
+{
+ npy_intp sz;
+ npy_intp vi;
+ npy_intp *pl, *pi, *pj, *pr;
+
+ if (NPY_UNLIKELY(num - l == 1)) {
+ return 1;
+ }
+
+ pl = tosort + l;
+
+ /* (not strictly) ascending sequence */
+ if (!@TYPE@_LT(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) {
+ for (pi = pl + 1; pi < tosort + num - 1
+ && !@TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) {
+ }
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + 1; pi < tosort + num - 1
+ && @TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) {
+ }
+
+ for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
+ INTP_SWAP(*pj, *pr);
+ }
+ }
+
+ ++pi;
+ sz = pi - pl;
+
+ if (sz < minrun) {
+ if (l + minrun < num) {
+ sz = minrun;
+ } else {
+ sz = num - l;
+ }
+
+ pr = pl + sz;
+
/* insertion sort */
- for (pi = pl + 1; pi < pr; ++pi) {
+ for (; pi < pr; ++pi) {
vi = *pi;
- vp = v + vi * len;
pj = pi;
- pk = pi - 1;
- while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) {
- *pj-- = *pk--;
+ while (pl < pj && @TYPE@_LT(arr + vi * len, arr + (*(pj - 1)) * len, len)) {
+ *pj = *(pj - 1);
+ --pj;
}
*pj = vi;
}
}
+
+ return sz;
+}
+
+
+static npy_intp
+agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort,
+ const npy_intp size, const @type@ *key, size_t len)
+{
+ npy_intp last_ofs, ofs, l, m, r;
+
+ if (@TYPE@_LT(arr + tosort[size - 1] * len, key, len)) {
+ return size;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size;
+ break;
+ }
+
+ if (@TYPE@_LT(arr + tosort[size - ofs - 1] * len, key, len)) {
+ break;
+ } else {
+ last_ofs = ofs;
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */
+ l = size - ofs - 1;
+ r = size - last_ofs - 1;
+
+ while (l + 1 < r) {
+ m = l + ((r - l) >> 1);
+
+ if (@TYPE@_LT(arr + tosort[m] * len, key, len)) {
+ l = m;
+ } else {
+ r = m;
+ }
+ }
+
+ /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */
+ return r;
+}
+
+
+static npy_intp
+agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort,
+ const npy_intp size, const @type@ *key, size_t len)
+{
+ npy_intp last_ofs, ofs, m;
+
+ if (@TYPE@_LT(key, arr + tosort[0] * len, len)) {
+ return 0;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size; /* arr[ofs] is never accessed */
+ break;
+ }
+
+ if (@TYPE@_LT(key, arr + tosort[ofs] * len, len)) {
+ break;
+ } else {
+ last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */
+ while (last_ofs + 1 < ofs) {
+ m = last_ofs + ((ofs - last_ofs) >> 1);
+
+ if (@TYPE@_LT(key, arr + tosort[m] * len, len)) {
+ ofs = m;
+ } else {
+ last_ofs = m;
+ }
+ }
+
+ /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */
+ return ofs;
+}
+
+
+
+static void
+amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2, npy_intp *p3, size_t len)
+{
+ npy_intp *end = p2 + l2;
+ memcpy(p3, p1, sizeof(npy_intp) * l1);
+ /* first element must be in p2 otherwise skipped in the caller */
+ *p1++ = *p2++;
+
+ while (p1 < p2 && p2 < end) {
+ if (@TYPE@_LT(arr + (*p2) * len, arr + (*p3) * len, len)) {
+ *p1++ = *p2++;
+ } else {
+ *p1++ = *p3++;
+ }
+ }
+
+ if (p1 != p2) {
+ memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
+ }
+}
+
+
+static void
+amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2, npy_intp *p3, size_t len)
+{
+ npy_intp ofs;
+ npy_intp *start = p1 - 1;
+ memcpy(p3, p2, sizeof(npy_intp) * l2);
+ p1 += l1 - 1;
+ p2 += l2 - 1;
+ p3 += l2 - 1;
+ /* first element must be in p1 otherwise skipped in the caller */
+ *p2-- = *p1--;
+
+ while (p1 < p2 && start < p1) {
+ if (@TYPE@_LT(arr + (*p3) * len, arr + (*p1) * len, len)) {
+ *p2-- = *p1--;
+ } else {
+ *p2-- = *p3--;
+ }
+ }
+
+ if (p1 != p2) {
+ ofs = p2 - start;
+ memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
+ }
+}
+
+
+
+static int
+amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack,
+ const npy_intp at, buffer_intp *buffer, size_t len)
+{
+ int ret;
+ npy_intp s1, l1, s2, l2, k;
+ npy_intp *p1, *p2;
+ s1 = stack[at].s;
+ l1 = stack[at].l;
+ s2 = stack[at + 1].s;
+ l2 = stack[at + 1].l;
+ /* tosort[s2] belongs to tosort[s1+k] */
+ k = agallop_right_@suff@(arr, tosort + s1, l1, arr + tosort[s2] * len, len);
+
+ if (l1 == k) {
+ /* already sorted */
+ return 0;
+ }
+
+ p1 = tosort + s1 + k;
+ l1 -= k;
+ p2 = tosort + s2;
+ /* tosort[s2-1] belongs to tosort[s2+l2] */
+ l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len,
+ len);
+
+ if (l2 < l1) {
+ ret = resize_buffer_intp(buffer, l2);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw, len);
+ } else {
+ ret = resize_buffer_intp(buffer, l1);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw, len);
+ }
+
+ return 0;
+}
+
+
+static int
+atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr, buffer_intp *buffer, size_t len)
+{
+ int ret;
+ npy_intp A, B, C, top;
+ top = *stack_ptr;
+
+ while (1 < top) {
+ B = stack[top - 2].l;
+ C = stack[top - 1].l;
+
+ if ((2 < top && stack[top - 3].l <= B + C) ||
+ (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
+ A = stack[top - 3].l;
+
+ if (A <= C) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += B;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ }
+ } else if (1 < top && B <= C) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ } else {
+ break;
+ }
+ }
+
+ *stack_ptr = top;
+ return 0;
+}
+
+
+
+static int
+aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr, buffer_intp *buffer, size_t len)
+{
+ int ret;
+ npy_intp top = *stack_ptr;
+
+ while (2 < top) {
+ if (stack[top - 3].l <= stack[top - 1].l) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += stack[top - 2].l;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += stack[top - 1].l;
+ --top;
+ }
+ }
+
+ if (1 < top) {
+ ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+ }
+
+ return 0;
}
int
-atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *varr)
+atimsort_@suff@(void *start, npy_intp *tosort, npy_intp num, void *varr)
{
PyArrayObject *arr = varr;
size_t elsize = PyArray_ITEMSIZE(arr);
size_t len = elsize / sizeof(@type@);
- npy_intp *pl, *pr, *pw;
+ int ret;
+ npy_intp l, n, stack_ptr, minrun;
+ run stack[TIMSORT_STACK_SIZE];
+ buffer_intp buffer;
+ buffer.pw = NULL;
+ buffer.size = 0;
+ stack_ptr = 0;
+ minrun = compute_min_run_short(num);
- /* Items that have zero size don't make sense to sort */
- if (elsize == 0) {
- return 0;
+ for (l = 0; l < num;) {
+ n = acount_run_@suff@(start, tosort, l, num, minrun, len);
+ /* both s and l are scaled by len */
+ stack[stack_ptr].s = l;
+ stack[stack_ptr].l = n;
+ ++stack_ptr;
+ ret = atry_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ l += n;
}
- pl = tosort;
- pr = pl + num;
- pw = malloc((num / 2) * sizeof(npy_intp));
+ ret = aforce_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len);
- if (pw == NULL) {
- return -NPY_ENOMEM;
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ ret = 0;
+cleanup:
+
+ if (buffer.pw != NULL) {
+ free(buffer.pw);
}
- amergesort0_@suff@(pl, pr, v, pw, len);
- free(pw);
- return 0;
+ return ret;
}
+
/**end repeat**/
+
/*
*****************************************************************************
** GENERIC SORT **
@@ -734,157 +1740,811 @@ atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *varr)
*/
-static void
-npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize,
- PyArray_CompareFunc *cmp, PyArrayObject *arr)
-{
- char *pi, *pj, *pk, *pm;
-
- if (pr - pl > SMALL_MERGESORT * elsize) {
- /* merge sort */
- pm = pl + (((pr - pl) / elsize) >> 1) * elsize;
- npy_mergesort0(pl, pm, pw, vp, elsize, cmp, arr);
- npy_mergesort0(pm, pr, pw, vp, elsize, cmp, arr);
- GENERIC_COPY(pw, pl, pm - pl);
- pi = pw + (pm - pl);
- pj = pw;
- pk = pl;
-
- while (pj < pi && pm < pr) {
- if (cmp(pm, pj, arr) < 0) {
- GENERIC_COPY(pk, pm, elsize);
- pm += elsize;
- pk += elsize;
- } else {
- GENERIC_COPY(pk, pj, elsize);
- pj += elsize;
- pk += elsize;
- }
- }
+typedef struct {
+ char *pw;
+ npy_intp size;
+ size_t len;
+} buffer_char;
+
- GENERIC_COPY(pk, pj, pi - pj);
+static NPY_INLINE int
+resize_buffer_char(buffer_char *buffer, npy_intp new_size)
+{
+ if (new_size <= buffer->size) {
+ return 0;
+ }
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ buffer->pw = malloc(sizeof(char) * new_size * buffer->len);
+ } else {
+ buffer->pw = realloc(buffer->pw, sizeof(char) * new_size * buffer->len);
+ }
+
+ buffer->size = new_size;
+
+ if (NPY_UNLIKELY(buffer->pw == NULL)) {
+ return -NPY_ENOMEM;
} else {
+ return 0;
+ }
+}
+
+
+static npy_intp
+npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun,
+ char *vp, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp sz;
+ char *pl, *pi, *pj, *pr;
+
+ if (NPY_UNLIKELY(num - l == 1)) {
+ return 1;
+ }
+
+ pl = arr + l * len;
+
+ /* (not strictly) ascending sequence */
+ if (cmp(pl, pl + len, py_arr) <= 0) {
+ for (pi = pl + len; pi < arr + (num - 1) * len
+ && cmp(pi, pi + len, py_arr) <= 0; pi += len) {
+ }
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + len; pi < arr + (num - 1) * len
+ && cmp(pi + len, pi, py_arr) < 0; pi += len) {
+ }
+
+ for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) {
+ GENERIC_SWAP(pj, pr, len);
+ }
+ }
+
+ pi += len;
+ sz = (pi - pl) / len;
+
+ if (sz < minrun) {
+ if (l + minrun < num) {
+ sz = minrun;
+ } else {
+ sz = num - l;
+ }
+
+ pr = pl + sz * len;
+
/* insertion sort */
- for (pi = pl + elsize; pi < pr; pi += elsize) {
- GENERIC_COPY(vp, pi, elsize);
+ for (; pi < pr; pi += len) {
+ GENERIC_COPY(vp, pi, len);
pj = pi;
- pk = pi - elsize;
- while (pj > pl && cmp(vp, pk, arr) < 0) {
- GENERIC_COPY(pj, pk, elsize);
- pj -= elsize;
- pk -= elsize;
+ while (pl < pj && cmp(vp, pj - len, py_arr) < 0) {
+ GENERIC_COPY(pj, pj - len, len);
+ pj -= len;
}
- GENERIC_COPY(pj, vp, elsize);
+ GENERIC_COPY(pj, vp, len);
}
}
+
+ return sz;
}
-int
-npy_timsort(void *start, npy_intp num, void *varr)
+static npy_intp
+npy_gallop_right(const char *arr, const npy_intp size, const char *key,
+ size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
{
- PyArrayObject *arr = varr;
- npy_intp elsize = PyArray_ITEMSIZE(arr);
- PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
- char *pl = start;
- char *pr = pl + num * elsize;
- char *pw;
- char *vp;
- int err = -NPY_ENOMEM;
+ npy_intp last_ofs, ofs, m;
- /* Items that have zero size don't make sense to sort */
- if (elsize == 0) {
+ if (cmp(key, arr, py_arr) < 0) {
return 0;
}
- pw = malloc((num >> 1) * elsize);
- vp = malloc(elsize);
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size; /* arr[ofs] is never accessed */
+ break;
+ }
- if (pw != NULL && vp != NULL) {
- npy_mergesort0(pl, pr, pw, vp, elsize, cmp, arr);
- err = 0;
+ if (cmp(key, arr + ofs * len, py_arr) < 0) {
+ break;
+ } else {
+ last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[last_ofs*len] <= key < arr[ofs*len] */
+ while (last_ofs + 1 < ofs) {
+ m = last_ofs + ((ofs - last_ofs) >> 1);
+
+ if (cmp(key, arr + m * len, py_arr) < 0) {
+ ofs = m;
+ } else {
+ last_ofs = m;
+ }
}
- free(vp);
- free(pw);
- return err;
+ /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */
+ return ofs;
+}
+
+
+
+static npy_intp
+npy_gallop_left(const char *arr, const npy_intp size, const char *key,
+ size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp last_ofs, ofs, l, m, r;
+
+ if (cmp(arr + (size - 1) * len, key, py_arr) < 0) {
+ return size;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size;
+ break;
+ }
+
+ if (cmp(arr + (size - ofs - 1) * len, key, py_arr) < 0) {
+ break;
+ } else {
+ last_ofs = ofs;
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */
+ l = size - ofs - 1;
+ r = size - last_ofs - 1;
+
+ while (l + 1 < r) {
+ m = l + ((r - l) >> 1);
+
+ if (cmp(arr + m * len, key, py_arr) < 0) {
+ l = m;
+ } else {
+ r = m;
+ }
+ }
+
+ /* now that arr[(r-1)*len] < key <= arr[r*len] */
+ return r;
}
static void
-npy_amergesort0(npy_intp *pl, npy_intp *pr, char *v, npy_intp *pw,
- npy_intp elsize, PyArray_CompareFunc *cmp, PyArrayObject *arr)
+npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2,
+ char *p3, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
{
- char *vp;
- npy_intp vi, *pi, *pj, *pk, *pm;
+ char *end = p2 + l2 * len;
+ memcpy(p3, p1, sizeof(char) * l1 * len);
+ /* first element must be in p2 otherwise skipped in the caller */
+ GENERIC_COPY(p1, p2, len);
+ p1 += len;
+ p2 += len;
+
+ while (p1 < p2 && p2 < end) {
+ if (cmp(p2, p3, py_arr) < 0) {
+ GENERIC_COPY(p1, p2, len);
+ p1 += len;
+ p2 += len;
+ } else {
+ GENERIC_COPY(p1, p3, len);
+ p1 += len;
+ p3 += len;
+ }
+ }
+
+ if (p1 != p2) {
+ memcpy(p1, p3, sizeof(char) * (p2 - p1));
+ }
+}
+
- if (pr - pl > SMALL_MERGESORT) {
- /* merge sort */
- pm = pl + ((pr - pl) >> 1);
- npy_amergesort0(pl, pm, v, pw, elsize, cmp, arr);
- npy_amergesort0(pm, pr, v, pw, elsize, cmp, arr);
+static void
+npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2,
+ char *p3, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp ofs;
+ char *start = p1 - len;
+ memcpy(p3, p2, sizeof(char) * l2 * len);
+ p1 += (l1 - 1) * len;
+ p2 += (l2 - 1) * len;
+ p3 += (l2 - 1) * len;
+ /* first element must be in p1 otherwise skipped in the caller */
+ GENERIC_COPY(p2, p1, len);
+ p2 -= len;
+ p1 -= len;
- for (pi = pw, pj = pl; pj < pm;) {
- *pi++ = *pj++;
+ while (p1 < p2 && start < p1) {
+ if (cmp(p3, p1, py_arr) < 0) {
+ GENERIC_COPY(p2, p1, len);
+ p2 -= len;
+ p1 -= len;
+ } else {
+ GENERIC_COPY(p2, p3, len);
+ p2 -= len;
+ p3 -= len;
}
+ }
+
+ if (p1 != p2) {
+ ofs = p2 - start;
+ memcpy(start + len, p3 - ofs + len, sizeof(char) * ofs);
+ }
+}
- pi = pw + (pm - pl);
- pj = pw;
- pk = pl;
- while (pj < pi && pm < pr) {
- if (cmp(v + (*pm)*elsize, v + (*pj)*elsize, arr) < 0) {
- *pk++ = *pm++;
+
+static int
+npy_merge_at(char *arr, const run *stack, const npy_intp at,
+ buffer_char *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp s1, l1, s2, l2, k;
+ char *p1, *p2;
+ s1 = stack[at].s;
+ l1 = stack[at].l;
+ s2 = stack[at + 1].s;
+ l2 = stack[at + 1].l;
+ /* arr[s2] belongs to arr[s1+k] */
+ GENERIC_COPY(buffer->pw, arr + s2 * len, len);
+ k = npy_gallop_right(arr + s1 * len, l1, buffer->pw, len, cmp, py_arr);
+
+ if (l1 == k) {
+ /* already sorted */
+ return 0;
+ }
+
+ p1 = arr + (s1 + k) * len;
+ l1 -= k;
+ p2 = arr + s2 * len;
+ /* arr[s2-1] belongs to arr[s2+l2] */
+ GENERIC_COPY(buffer->pw, arr + (s2 - 1) * len, len);
+ l2 = npy_gallop_left(arr + s2 * len, l2, buffer->pw, len, cmp, py_arr);
+
+ if (l2 < l1) {
+ ret = resize_buffer_char(buffer, l2);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ npy_merge_right(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
+ } else {
+ ret = resize_buffer_char(buffer, l1);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ npy_merge_left(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
+ }
+
+ return 0;
+}
+
+
+static int
+npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr,
+ buffer_char *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp A, B, C, top;
+ top = *stack_ptr;
+
+ while (1 < top) {
+ B = stack[top - 2].l;
+ C = stack[top - 1].l;
+
+ if ((2 < top && stack[top - 3].l <= B + C) ||
+ (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
+ A = stack[top - 3].l;
+
+ if (A <= C) {
+ ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += B;
+ stack[top - 2] = stack[top - 1];
+ --top;
} else {
- *pk++ = *pj++;
+ ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
}
+ } else if (1 < top && B <= C) {
+ ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ } else {
+ break;
}
+ }
+
+ *stack_ptr = top;
+ return 0;
+}
+
- while (pj < pi) {
- *pk++ = *pj++;
+static int
+npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr,
+ buffer_char *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp top = *stack_ptr;
+
+ while (2 < top) {
+ if (stack[top - 3].l <= stack[top - 1].l) {
+ ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += stack[top - 2].l;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += stack[top - 1].l;
+ --top;
}
- } else {
+ }
+
+ if (1 < top) {
+ ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+ }
+
+ return 0;
+}
+
+
+int
+npy_timsort(void *start, npy_intp num, void *varr)
+{
+ size_t len = PyArray_ITEMSIZE(varr);
+ PyArray_CompareFunc *cmp = PyArray_DESCR(varr)->f->compare;
+ int ret;
+ npy_intp l, n, stack_ptr, minrun;
+ run stack[TIMSORT_STACK_SIZE];
+ buffer_char buffer;
+ buffer.pw = NULL;
+ buffer.size = 0;
+ buffer.len = len;
+ stack_ptr = 0;
+ minrun = compute_min_run_short(num);
+ /* used for insertion sort and gallop key */
+ ret = resize_buffer_char(&buffer, len);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ for (l = 0; l < num;) {
+ n = npy_count_run(start, l, num, minrun, buffer.pw, len, cmp, varr);
+ /* both s and l are scaled by len */
+ stack[stack_ptr].s = l;
+ stack[stack_ptr].l = n;
+ ++stack_ptr;
+ ret = npy_try_collapse(start, stack, &stack_ptr, &buffer, len, cmp, varr);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ l += n;
+ }
+
+ ret = npy_force_collapse(start, stack, &stack_ptr, &buffer, len, cmp, varr);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ ret = 0;
+cleanup:
+
+ if (buffer.pw != NULL) {
+ free(buffer.pw);
+ }
+
+ return ret;
+}
+
+
+/* argsort */
+
+static npy_intp
+npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num,
+ npy_intp minrun, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp sz;
+ npy_intp vi;
+ npy_intp *pl, *pi, *pj, *pr;
+
+ if (NPY_UNLIKELY(num - l == 1)) {
+ return 1;
+ }
+
+ pl = tosort + l;
+
+ /* (not strictly) ascending sequence */
+ if (cmp(arr + (*pl) * len, arr + (*(pl + 1)) * len, py_arr) <= 0) {
+ for (pi = pl + 1; pi < tosort + num - 1
+ && cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; ++pi) {
+ }
+ } else { /* (strictly) descending sequence */
+ for (pi = pl + 1; pi < tosort + num - 1
+ && cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; ++pi) {
+ }
+
+ for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
+ INTP_SWAP(*pj, *pr);
+ }
+ }
+
+ ++pi;
+ sz = pi - pl;
+
+ if (sz < minrun) {
+ if (l + minrun < num) {
+ sz = minrun;
+ } else {
+ sz = num - l;
+ }
+
+ pr = pl + sz;
+
/* insertion sort */
- for (pi = pl + 1; pi < pr; ++pi) {
+ for (; pi < pr; ++pi) {
vi = *pi;
- vp = v + vi * elsize;
pj = pi;
- pk = pi - 1;
- while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) {
- *pj-- = *pk--;
+ while (pl < pj && cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) {
+ *pj = *(pj - 1);
+ --pj;
}
*pj = vi;
}
}
+
+ return sz;
}
-int
-npy_atimsort(void *v, npy_intp *tosort, npy_intp num, void *varr)
+static npy_intp
+npy_agallop_left(const char *arr, const npy_intp *tosort,
+ const npy_intp size, const char *key, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
{
- PyArrayObject *arr = varr;
- npy_intp elsize = PyArray_ITEMSIZE(arr);
- PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
- npy_intp *pl, *pr, *pw;
+ npy_intp last_ofs, ofs, l, m, r;
+
+ if (cmp(arr + tosort[size - 1] * len, key, py_arr) < 0) {
+ return size;
+ }
+
+ last_ofs = 0;
+ ofs = 1;
+
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size;
+ break;
+ }
- /* Items that have zero size don't make sense to sort */
- if (elsize == 0) {
+ if (cmp(arr + tosort[size - ofs - 1] * len, key, py_arr) < 0) {
+ break;
+ } else {
+ last_ofs = ofs;
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */
+ l = size - ofs - 1;
+ r = size - last_ofs - 1;
+
+ while (l + 1 < r) {
+ m = l + ((r - l) >> 1);
+
+ if (cmp(arr + tosort[m] * len, key, py_arr) < 0) {
+ l = m;
+ } else {
+ r = m;
+ }
+ }
+
+ /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */
+ return r;
+}
+
+
+static npy_intp
+npy_agallop_right(const char *arr, const npy_intp *tosort,
+ const npy_intp size, const char *key, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp last_ofs, ofs, m;
+
+ if (cmp(key, arr + tosort[0] * len, py_arr) < 0) {
return 0;
}
- pl = tosort;
- pr = pl + num;
- pw = malloc((num >> 1) * sizeof(npy_intp));
+ last_ofs = 0;
+ ofs = 1;
- if (pw == NULL) {
- return -NPY_ENOMEM;
+ for (;;) {
+ if (size <= ofs || ofs < 0) {
+ ofs = size; /* arr[ofs] is never accessed */
+ break;
+ }
+
+ if (cmp(key, arr + tosort[ofs] * len, py_arr) < 0) {
+ break;
+ } else {
+ last_ofs = ofs;
+ /* ofs = 1, 3, 7, 15... */
+ ofs = (ofs << 1) + 1;
+ }
+ }
+
+ /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */
+ while (last_ofs + 1 < ofs) {
+ m = last_ofs + ((ofs - last_ofs) >> 1);
+
+ if (cmp(key, arr + tosort[m] * len, py_arr) < 0) {
+ ofs = m;
+ } else {
+ last_ofs = m;
+ }
+ }
+
+ /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */
+ return ofs;
+}
+
+
+static void
+npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2, npy_intp *p3, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp *end = p2 + l2;
+ memcpy(p3, p1, sizeof(npy_intp) * l1);
+ /* first element must be in p2 otherwise skipped in the caller */
+ *p1++ = *p2++;
+
+ while (p1 < p2 && p2 < end) {
+ if (cmp(arr + (*p2) * len, arr + (*p3) * len, py_arr) < 0) {
+ *p1++ = *p2++;
+ } else {
+ *p1++ = *p3++;
+ }
+ }
+
+ if (p1 != p2) {
+ memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
+ }
+}
+
+
+static void
+npy_amerge_right(char *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
+ npy_intp l2, npy_intp *p3, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ npy_intp ofs;
+ npy_intp *start = p1 - 1;
+ memcpy(p3, p2, sizeof(npy_intp) * l2);
+ p1 += l1 - 1;
+ p2 += l2 - 1;
+ p3 += l2 - 1;
+ /* first element must be in p1 otherwise skipped in the caller */
+ *p2-- = *p1--;
+
+ while (p1 < p2 && start < p1) {
+ if (cmp(arr + (*p3) * len, arr + (*p1) * len, py_arr) < 0) {
+ *p2-- = *p1--;
+ } else {
+ *p2-- = *p3--;
+ }
+ }
+
+ if (p1 != p2) {
+ ofs = p2 - start;
+ memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
+ }
+}
+
+
+
+static int
+npy_amerge_at(char *arr, npy_intp *tosort, const run *stack,
+ const npy_intp at, buffer_intp *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp s1, l1, s2, l2, k;
+ npy_intp *p1, *p2;
+ s1 = stack[at].s;
+ l1 = stack[at].l;
+ s2 = stack[at + 1].s;
+ l2 = stack[at + 1].l;
+ /* tosort[s2] belongs to tosort[s1+k] */
+ k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, cmp,
+ py_arr);
+
+ if (l1 == k) {
+ /* already sorted */
+ return 0;
+ }
+
+ p1 = tosort + s1 + k;
+ l1 -= k;
+ p2 = tosort + s2;
+ /* tosort[s2-1] belongs to tosort[s2+l2] */
+ l2 = npy_agallop_left(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len,
+ len, cmp, py_arr);
+
+ if (l2 < l1) {
+ ret = resize_buffer_intp(buffer, l2);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ npy_amerge_right(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
+ } else {
+ ret = resize_buffer_intp(buffer, l1);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ npy_amerge_left(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
}
- npy_amergesort0(pl, pr, v, pw, elsize, cmp, arr);
- free(pw);
return 0;
}
+
+
+static int
+npy_atry_collapse(char *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr, buffer_intp *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp A, B, C, top;
+ top = *stack_ptr;
+
+ while (1 < top) {
+ B = stack[top - 2].l;
+ C = stack[top - 1].l;
+
+ if ((2 < top && stack[top - 3].l <= B + C) ||
+ (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
+ A = stack[top - 3].l;
+
+ if (A <= C) {
+ ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += B;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ }
+ } else if (1 < top && B <= C) {
+ ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += C;
+ --top;
+ } else {
+ break;
+ }
+ }
+
+ *stack_ptr = top;
+ return 0;
+}
+
+
+static int
+npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack,
+ npy_intp *stack_ptr, buffer_intp *buffer, size_t len,
+ PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
+{
+ int ret;
+ npy_intp top = *stack_ptr;
+
+ while (2 < top) {
+ if (stack[top - 3].l <= stack[top - 1].l) {
+ ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 3].l += stack[top - 2].l;
+ stack[top - 2] = stack[top - 1];
+ --top;
+ } else {
+ ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+
+ stack[top - 2].l += stack[top - 1].l;
+ --top;
+ }
+ }
+
+ if (1 < top) {
+ ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
+
+ if (NPY_UNLIKELY(ret < 0)) { return ret; }
+ }
+
+ return 0;
+}
+
+
+int
+npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr)
+{
+ size_t len = PyArray_ITEMSIZE(varr);
+ PyArray_CompareFunc *cmp = PyArray_DESCR(varr)->f->compare;
+ int ret;
+ npy_intp l, n, stack_ptr, minrun;
+ run stack[TIMSORT_STACK_SIZE];
+ buffer_intp buffer;
+ buffer.pw = NULL;
+ buffer.size = 0;
+ stack_ptr = 0;
+ minrun = compute_min_run_short(num);
+
+ for (l = 0; l < num;) {
+ n = npy_acount_run(start, tosort, l, num, minrun, len, cmp, varr);
+ /* both s and l are scaled by len */
+ stack[stack_ptr].s = l;
+ stack[stack_ptr].l = n;
+ ++stack_ptr;
+ ret = npy_atry_collapse(start, tosort, stack, &stack_ptr, &buffer, len, cmp,
+ varr);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ l += n;
+ }
+
+ ret = npy_aforce_collapse(start, tosort, stack, &stack_ptr, &buffer, len,
+ cmp, varr);
+
+ if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
+
+ ret = 0;
+cleanup:
+
+ if (buffer.pw != NULL) {
+ free(buffer.pw);
+ }
+
+ return ret;
+}