BUG: Fix weak scalar logic for large ints in ufuncs

This fixes it, breaks warnings (partially), but most or all of those paths should be errors anyway.
author: Sebastian Berg <sebastianb@nvidia.com> 2023-05-10 15:11:38 +0200
committer: Sebastian Berg <sebastianb@nvidia.com> 2023-05-15 12:43:30 +0200
commit: 21602a8b1673a7b468d032ef19c20c53ac15c0b9 (patch)
tree: 542eddc432d5a6004c9bbc34a119e4bf58163b34
parent: b8a43cb809edd754044af4aadfb9295915333b98 (diff)
download: numpy-21602a8b1673a7b468d032ef19c20c53ac15c0b9.tar.gz
3 files changed, 74 insertions, 3 deletions
diff --git a/numpy/core/src/multiarray/arrayobject.h b/numpy/core/src/multiarray/arrayobject.h
index f7d0734db..d9900439c 100644
--- a/numpy/core/src/multiarray/arrayobject.h
+++ b/numpy/core/src/multiarray/arrayobject.h
@@ -40,6 +40,13 @@ static const int NPY_ARRAY_WARN_ON_WRITE = (1 << 31);
 static const int NPY_ARRAY_WAS_PYTHON_INT = (1 << 30);
 static const int NPY_ARRAY_WAS_PYTHON_FLOAT = (1 << 29);
 static const int NPY_ARRAY_WAS_PYTHON_COMPLEX = (1 << 28);
+/*
+ * Mark that this was a huge int and the array needed replace (no re-use).
+ * This flag is only used in the ufunc machinery where it is tricky to cover
+ * correct all type resolution paths where `np.array(large_integer)` returns
+ * an object array.
+ */
+static const int NPY_ARRAY_WAS_INT_AND_REPLACED = (1 << 27);
 static const int NPY_ARRAY_WAS_PYTHON_LITERAL = (1 << 30 | 1 << 29 | 1 << 28);
 
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_ARRAYOBJECT_H_ */
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 39e64decb..94cd73ef6 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1011,6 +1011,34 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
          * necessary to propagate the information to the legacy type resolution.
          */
         if (npy_mark_tmp_array_if_pyscalar(obj, out_op[i], &out_op_DTypes[i])) {
+            if (PyArray_FLAGS(out_op[i]) & NPY_ARRAY_WAS_PYTHON_INT
+                    && PyArray_TYPE(out_op[i]) != NPY_LONG) {
+                /*
+                 * When `np.array(integer)` is not the default integer (mainly
+                 * object dtype), this confuses many type resolvers.  Simply
+                 * forcing a default integer array is unfortunately easiest.
+                 * In this disables the optional NEP 50 warnings, but in
+                 * practice when this happens we should _usually_ pick the
+                 * default integer loop and that raises an error.
+                 * (An exception is `float64(1.) + 10**100` which silently
+                 * will give a float64 result rather than a Python float.)
+                 *
+                 * TODO: Just like the general dual NEP 50/legacy promotion
+                 * support this is meant as a temporary hack for NumPy 1.25.
+                 */
+                static PyArrayObject *zero_arr = NULL;
+                if (NPY_UNLIKELY(zero_arr == NULL)) {
+                    zero_arr = (PyArrayObject *)PyArray_ZEROS(
+                            0, NULL, NPY_LONG, NPY_FALSE);
+                    if (zero_arr == NULL) {
+                        goto fail;
+                    }
+                    ((PyArrayObject_fields *)zero_arr)->flags |= (
+                        NPY_ARRAY_WAS_PYTHON_INT|NPY_ARRAY_WAS_INT_AND_REPLACED);
+                }
+                Py_INCREF(zero_arr);
+                Py_SETREF(out_op[i], zero_arr);
+            }
             *promoting_pyscalars = NPY_TRUE;
         }
     }
@@ -4929,9 +4957,13 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
             if (!(orig_flags & NPY_ARRAY_WAS_PYTHON_LITERAL)) {
                 continue;
             }
-            /* If the descriptor matches, no need to worry about conversion */
-            if (PyArray_EquivTypes(
-                    PyArray_DESCR(operands[i]), operation_descrs[i])) {
+            /*
+             * If descriptor matches, no need to convert, but integers may
+             * have been too large.
+             */
+            if (!(orig_flags & NPY_ARRAY_WAS_INT_AND_REPLACED)
+                    && PyArray_EquivTypes(
+                        PyArray_DESCR(operands[i]), operation_descrs[i])) {
                 continue;
             }
             /* Otherwise, replace the operand with a new array */
diff --git a/numpy/core/tests/test_nep50_promotions.py b/numpy/core/tests/test_nep50_promotions.py
index 9e84c78c1..0b297e0f7 100644
--- a/numpy/core/tests/test_nep50_promotions.py
+++ b/numpy/core/tests/test_nep50_promotions.py
@@ -181,6 +181,7 @@ def test_nep50_integer_regression():
     assert (arr + 2**63).dtype == np.float64
     assert (arr[()] + 2**63).dtype == np.float64
 
+
 def test_nep50_with_axisconcatenator():
     # I promised that this will be an error in the future in the 1.25
     # release notes;  test this (NEP 50 opt-in makes the deprecation an error).
@@ -188,3 +189,34 @@ def test_nep50_with_axisconcatenator():
 
     with pytest.raises(OverflowError):
         np.r_[np.arange(5, dtype=np.int8), 255]
+
+
+@pytest.mark.parametrize("ufunc", [np.add, np.power])
+@pytest.mark.parametrize("state", ["weak", "weak_and_warn"])
+def test_nep50_huge_integers(ufunc, state):
+    # Very large integers are complicated, because they go to uint64 or
+    # object dtype.  When mixed with another uint64 that should
+    np._set_promotion_state(state)
+
+    with pytest.raises(OverflowError):
+        ufunc(np.int64(0), 2**63)  # 2**63 too large for int64
+
+    if state == "weak_and_warn":
+        with pytest.warns(UserWarning,
+                match="result dtype changed.*float64.*uint64"):
+            with pytest.raises(OverflowError):
+                ufunc(np.uint64(0), 2**64)
+    else:
+        with pytest.raises(OverflowError):
+            ufunc(np.uint64(0), 2**64)  # 2**64 cannot be represented by uint64
+
+    # However, 2**63 can be represented by the uint64 (and that is used):
+    if state == "weak_and_warn":
+        with pytest.warns(UserWarning,
+                match="result dtype changed.*float64.*uint64"):
+            res = ufunc(np.uint64(1), 2**63)
+    else:
+        res = ufunc(np.uint64(1), 2**63)
+
+    assert res.dtype == np.uint64
+    assert res == ufunc(1, 2**63, dtype=object)
author	Sebastian Berg <sebastianb@nvidia.com>	2023-05-10 15:11:38 +0200
committer	Sebastian Berg <sebastianb@nvidia.com>	2023-05-15 12:43:30 +0200
commit	21602a8b1673a7b468d032ef19c20c53ac15c0b9 (patch)
tree	542eddc432d5a6004c9bbc34a119e4bf58163b34
parent	b8a43cb809edd754044af4aadfb9295915333b98 (diff)
download	numpy-21602a8b1673a7b468d032ef19c20c53ac15c0b9.tar.gz