diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2023-04-06 17:20:59 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-06 17:20:59 -0400 |
commit | 3c6acb51242a4f1067c6ac5159dece8889d19d10 (patch) | |
tree | 05307718ae3ce7f8c23646412819c9536f4d2537 | |
parent | dcd76b33115bb2bf5d9f438e38d0b93d8fab2f84 (diff) | |
parent | 5183da05f2a748589e07716d3421a82641dfa3b7 (diff) | |
download | numpy-3c6acb51242a4f1067c6ac5159dece8889d19d10.tar.gz |
Merge pull request #23543 from charris/backport-23495
BUG: fix loading and storing big arrays on s390x
-rw-r--r-- | numpy/core/src/common/simd/vec/memory.h | 14 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 7 |
2 files changed, 14 insertions, 7 deletions
diff --git a/numpy/core/src/common/simd/vec/memory.h b/numpy/core/src/common/simd/vec/memory.h index e8f588ef2..495f698bf 100644 --- a/numpy/core/src/common/simd/vec/memory.h +++ b/numpy/core/src/common/simd/vec/memory.h @@ -173,9 +173,9 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n assert(nlane > 0); npyv_s32 vfill = npyv_setall_s32(fill); #ifdef NPY_HAVE_VX - const unsigned blane = (unsigned short)nlane; + const unsigned blane = (nlane > 4) ? 4 : nlane; const npyv_u32 steps = npyv_set_u32(0, 1, 2, 3); - const npyv_u32 vlane = npyv_setall_u32((unsigned)blane); + const npyv_u32 vlane = npyv_setall_u32(blane); const npyv_b32 mask = vec_cmpgt(vlane, steps); npyv_s32 a = vec_load_len(ptr, blane*4-1); return vec_sel(vfill, a, mask); @@ -201,8 +201,8 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) { #ifdef NPY_HAVE_VX - unsigned blane = ((unsigned short)nlane)*4 - 1; - return vec_load_len(ptr, blane); + unsigned blane = (nlane > 4) ? 4 : nlane; + return vec_load_len(ptr, blane*4-1); #else return npyv_load_till_s32(ptr, nlane, 0); #endif @@ -220,7 +220,7 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) { #ifdef NPY_HAVE_VX - unsigned blane = (unsigned short)nlane; + unsigned blane = (nlane > 2) ? 2 : nlane; return vec_load_len((const signed long long*)ptr, blane*8-1); #else return npyv_load_till_s64(ptr, nlane, 0); @@ -273,7 +273,7 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a { assert(nlane > 0); #ifdef NPY_HAVE_VX - unsigned blane = (unsigned short)nlane; + unsigned blane = (nlane > 4) ? 4 : nlane; vec_store_len(a, ptr, blane*4-1); #else switch(nlane) { @@ -297,7 +297,7 @@ NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a { assert(nlane > 0); #ifdef NPY_HAVE_VX - unsigned blane = (unsigned short)nlane; + unsigned blane = (nlane > 2) ? 2 : nlane; vec_store_len(a, (signed long long*)ptr, blane*8-1); #else if (nlane == 1) { diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index cc6c0839d..cae846697 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -2789,3 +2789,10 @@ class TestLowlevelAPIAccess: with pytest.raises(TypeError): # cannot call it a second time: np.negative._get_strided_loop(call_info) + + def test_long_arrays(self): + t = np.zeros((1029, 917), dtype=np.single) + t[0][0] = 1 + t[28][414] = 1 + tc = np.cos(t) + assert_equal(tc[0][0], tc[28][414]) |