summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2023-04-06 17:20:59 -0400
committerGitHub <noreply@github.com>2023-04-06 17:20:59 -0400
commit3c6acb51242a4f1067c6ac5159dece8889d19d10 (patch)
tree05307718ae3ce7f8c23646412819c9536f4d2537
parentdcd76b33115bb2bf5d9f438e38d0b93d8fab2f84 (diff)
parent5183da05f2a748589e07716d3421a82641dfa3b7 (diff)
downloadnumpy-3c6acb51242a4f1067c6ac5159dece8889d19d10.tar.gz
Merge pull request #23543 from charris/backport-23495
BUG: fix loading and storing big arrays on s390x
-rw-r--r--numpy/core/src/common/simd/vec/memory.h14
-rw-r--r--numpy/core/tests/test_ufunc.py7
2 files changed, 14 insertions, 7 deletions
diff --git a/numpy/core/src/common/simd/vec/memory.h b/numpy/core/src/common/simd/vec/memory.h
index e8f588ef2..495f698bf 100644
--- a/numpy/core/src/common/simd/vec/memory.h
+++ b/numpy/core/src/common/simd/vec/memory.h
@@ -173,9 +173,9 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n
assert(nlane > 0);
npyv_s32 vfill = npyv_setall_s32(fill);
#ifdef NPY_HAVE_VX
- const unsigned blane = (unsigned short)nlane;
+ const unsigned blane = (nlane > 4) ? 4 : nlane;
const npyv_u32 steps = npyv_set_u32(0, 1, 2, 3);
- const npyv_u32 vlane = npyv_setall_u32((unsigned)blane);
+ const npyv_u32 vlane = npyv_setall_u32(blane);
const npyv_b32 mask = vec_cmpgt(vlane, steps);
npyv_s32 a = vec_load_len(ptr, blane*4-1);
return vec_sel(vfill, a, mask);
@@ -201,8 +201,8 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n
NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane)
{
#ifdef NPY_HAVE_VX
- unsigned blane = ((unsigned short)nlane)*4 - 1;
- return vec_load_len(ptr, blane);
+ unsigned blane = (nlane > 4) ? 4 : nlane;
+ return vec_load_len(ptr, blane*4-1);
#else
return npyv_load_till_s32(ptr, nlane, 0);
#endif
@@ -220,7 +220,7 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n
NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane)
{
#ifdef NPY_HAVE_VX
- unsigned blane = (unsigned short)nlane;
+ unsigned blane = (nlane > 2) ? 2 : nlane;
return vec_load_len((const signed long long*)ptr, blane*8-1);
#else
return npyv_load_till_s64(ptr, nlane, 0);
@@ -273,7 +273,7 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a
{
assert(nlane > 0);
#ifdef NPY_HAVE_VX
- unsigned blane = (unsigned short)nlane;
+ unsigned blane = (nlane > 4) ? 4 : nlane;
vec_store_len(a, ptr, blane*4-1);
#else
switch(nlane) {
@@ -297,7 +297,7 @@ NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a
{
assert(nlane > 0);
#ifdef NPY_HAVE_VX
- unsigned blane = (unsigned short)nlane;
+ unsigned blane = (nlane > 2) ? 2 : nlane;
vec_store_len(a, (signed long long*)ptr, blane*8-1);
#else
if (nlane == 1) {
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index cc6c0839d..cae846697 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -2789,3 +2789,10 @@ class TestLowlevelAPIAccess:
with pytest.raises(TypeError):
# cannot call it a second time:
np.negative._get_strided_loop(call_info)
+
+ def test_long_arrays(self):
+ t = np.zeros((1029, 917), dtype=np.single)
+ t[0][0] = 1
+ t[28][414] = 1
+ tc = np.cos(t)
+ assert_equal(tc[0][0], tc[28][414])