summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/neps/nep-0041-improved-dtype-support.rst52
-rw-r--r--doc/neps/nep-0042-new-dtypes.rst8
-rw-r--r--doc/release/upcoming_changes/17068.compatibility.rst4
-rw-r--r--doc/release/upcoming_changes/17116.expired.rst2
-rw-r--r--doc/source/reference/routines.ctypeslib.rst1
-rw-r--r--numpy/__init__.py19
-rw-r--r--numpy/__init__.pyi111
-rw-r--r--numpy/core/arrayprint.py3
-rw-r--r--numpy/core/setup.py3
-rw-r--r--numpy/core/src/common/array_assign.c4
-rw-r--r--numpy/core/src/common/npy_cblas.h35
-rw-r--r--numpy/core/src/multiarray/_multiarray_tests.c.src2
-rw-r--r--numpy/core/src/multiarray/arrayobject.c2
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src2
-rw-r--r--numpy/core/src/multiarray/buffer.c2
-rw-r--r--numpy/core/src/multiarray/common.c18
-rw-r--r--numpy/core/src/multiarray/common.h37
-rw-r--r--numpy/core/src/multiarray/convert.c4
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c7
-rw-r--r--numpy/core/src/multiarray/ctors.c14
-rw-r--r--numpy/core/src/multiarray/datetime.c36
-rw-r--r--numpy/core/src/multiarray/descriptor.c18
-rw-r--r--numpy/core/src/multiarray/dragon4.c4
-rw-r--r--numpy/core/src/multiarray/einsum.c.src1895
-rw-r--r--numpy/core/src/multiarray/einsum_debug.h28
-rw-r--r--numpy/core/src/multiarray/einsum_sumprod.c.src1897
-rw-r--r--numpy/core/src/multiarray/einsum_sumprod.h12
-rw-r--r--numpy/core/src/multiarray/flagsobject.c2
-rw-r--r--numpy/core/src/multiarray/getset.c11
-rw-r--r--numpy/core/src/multiarray/mapping.c8
-rw-r--r--numpy/core/src/multiarray/methods.c35
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c32
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c16
-rw-r--r--numpy/core/src/multiarray/nditer_pywrap.c2
-rw-r--r--numpy/core/src/multiarray/scalartypes.c.src76
-rw-r--r--numpy/core/src/multiarray/shape.c12
-rw-r--r--numpy/core/src/multiarray/strfuncs.c186
-rw-r--r--numpy/core/src/multiarray/usertypes.c2
-rw-r--r--numpy/core/src/umath/_rational_tests.c.src10
-rw-r--r--numpy/core/src/umath/_umath_tests.c.src2
-rw-r--r--numpy/core/src/umath/extobj.c2
-rw-r--r--numpy/core/src/umath/override.c2
-rw-r--r--numpy/core/src/umath/ufunc_object.c31
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c37
-rw-r--r--numpy/core/src/umath/umathmodule.c34
-rw-r--r--numpy/core/tests/test_regression.py28
-rw-r--r--numpy/ctypeslib.py8
-rw-r--r--numpy/f2py/cfuncs.py4
-rwxr-xr-xnumpy/f2py/rules.py2
-rw-r--r--numpy/f2py/src/test/foomodule.c2
-rw-r--r--numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c21
-rw-r--r--numpy/lib/arraysetops.py6
-rw-r--r--numpy/lib/function_base.py4
-rw-r--r--numpy/lib/tests/test_financial_expired.py13
-rw-r--r--numpy/linalg/umath_linalg.c.src2
-rw-r--r--numpy/ma/timer_comparison.py4
-rw-r--r--numpy/tests/typing/fail/fromnumeric.py28
-rw-r--r--numpy/tests/typing/pass/fromnumeric.py75
-rw-r--r--numpy/tests/typing/reveal/fromnumeric.py73
-rwxr-xr-xruntests.py33
60 files changed, 2661 insertions, 2362 deletions
diff --git a/doc/neps/nep-0041-improved-dtype-support.rst b/doc/neps/nep-0041-improved-dtype-support.rst
index 56ff5eac6..6dc4ea50c 100644
--- a/doc/neps/nep-0041-improved-dtype-support.rst
+++ b/doc/neps/nep-0041-improved-dtype-support.rst
@@ -514,22 +514,22 @@ are not yet fully clear, we anticipate, and accept the following changes:
* **C-API**:
- * In old versions of NumPy ``PyArray_DescrCheck`` is a macro which uses
- ``type(dtype) is np.dtype``. When compiling against an old NumPy version,
- the macro may have to be replaced with the corresponding
- ``PyObject_IsInstance`` call. (If this is a problem, we could backport
- fixing the macro)
-
- * The UFunc machinery changes will break *limited* parts of the current
- implementation. Replacing e.g. the default ``TypeResolver`` is expected
- to remain supported for a time, although optimized masked inner loop iteration
- (which is not even used *within* NumPy) will no longer be supported.
-
- * All functions currently defined on the dtypes, such as
- ``PyArray_Descr->f->nonzero``, will be defined and accessed differently.
- This means that in the long run lowlevel access code will
- have to be changed to use the new API. Such changes are expected to be
- necessary in very few project.
+ * In old versions of NumPy ``PyArray_DescrCheck`` is a macro which uses
+ ``type(dtype) is np.dtype``. When compiling against an old NumPy version,
+ the macro may have to be replaced with the corresponding
+ ``PyObject_IsInstance`` call. (If this is a problem, we could backport
+ fixing the macro)
+
+ * The UFunc machinery changes will break *limited* parts of the current
+ implementation. Replacing e.g. the default ``TypeResolver`` is expected
+ to remain supported for a time, although optimized masked inner loop iteration
+ (which is not even used *within* NumPy) will no longer be supported.
+
+ * All functions currently defined on the dtypes, such as
+ ``PyArray_Descr->f->nonzero``, will be defined and accessed differently.
+ This means that in the long run lowlevel access code will
+ have to be changed to use the new API. Such changes are expected to be
+ necessary in very few project.
* **dtype implementors (C-API)**:
@@ -541,16 +541,16 @@ are not yet fully clear, we anticipate, and accept the following changes:
At least in some code paths, a similar mechanism is already used.
* The ``scalarkind`` slot and registration of scalar casting will be
- removed/ignored without replacement.
- It currently allows partial value-based casting.
- The ``PyArray_ScalarKind`` function will continue to work for builtin types,
- but will not be used internally and be deprecated.
-
- * Currently user dtypes are defined as instances of ``np.dtype``.
- The creation works by the user providing a prototype instance.
- NumPy will need to modify at least the type during registration.
- This has no effect for either ``rational`` or ``quaternion`` and mutation
- of the structure seems unlikely after registration.
+ removed/ignored without replacement.
+ It currently allows partial value-based casting.
+ The ``PyArray_ScalarKind`` function will continue to work for builtin types,
+ but will not be used internally and be deprecated.
+
+ * Currently user dtypes are defined as instances of ``np.dtype``.
+ The creation works by the user providing a prototype instance.
+ NumPy will need to modify at least the type during registration.
+ This has no effect for either ``rational`` or ``quaternion`` and mutation
+ of the structure seems unlikely after registration.
Since there is a fairly large API surface concerning datatypes, further changes
or the limitation certain function to currently existing datatypes is
diff --git a/doc/neps/nep-0042-new-dtypes.rst b/doc/neps/nep-0042-new-dtypes.rst
index 1f476114f..b37555892 100644
--- a/doc/neps/nep-0042-new-dtypes.rst
+++ b/doc/neps/nep-0042-new-dtypes.rst
@@ -267,7 +267,7 @@ information is currently provided and will be defined on the class:
deprecated. This may be relaxed if a use-case arises.
Additionally, existing methods (and C-side fields) will be provided.
-However, the fields ``kind`` and and ``char`` will be set to ``\0``
+However, the fields ``kind`` and ``char`` will be set to ``\0``
(NULL character) on the C-side.
While discouraged, except for NumPy builtin types, ``kind`` both will return
the ``__qualname__`` of the object to ensure uniqueness for all DTypes.
@@ -307,7 +307,7 @@ is the ``np.datetime64`` scalar.
A potential DType such as ``Categorical`` will not be required to have a clear type
associated with it. Instead, the ``type`` may be ``object`` and the
-categoircal's values are arbitrary objects.
+categorical's values are arbitrary objects.
Unlike with well-defined scalars, this ``type`` cannot
not be used for the dtype discovery necessary for coercion
(compare section `DType Discovery during Array Coercion`_).
@@ -659,7 +659,7 @@ should be "minutes".
Common DType Operations
^^^^^^^^^^^^^^^^^^^^^^^
-Numpy currently provides functions like ``np.result_type`` and
+NumPy currently provides functions like ``np.result_type`` and
``np.promote_types`` for determining common types.
These differ in that ``np.result_type`` can take arrays and scalars as input
and implements value based promotion [1]_.
@@ -972,7 +972,7 @@ In general we could implement certain casts, such as ``int8`` to ``int24``
even if the user only provides an ``int16 -> int24`` cast.
This proposal currently does not provide this functionality. However,
it could be extended in the future to either find such casts dynamically,
-or at least allow ``adjust_descriptors`` to return arbitray ``dtypes``.
+or at least allow ``adjust_descriptors`` to return arbitrary ``dtypes``.
If ``CastingImpl[Int8, Int24].adjust_descriptors((int8, int24))`` returns
``(int16, int24)``, the actual casting process could be extended to include
the ``int8 -> int16`` cast. Unlike the above example, which is limited
diff --git a/doc/release/upcoming_changes/17068.compatibility.rst b/doc/release/upcoming_changes/17068.compatibility.rst
new file mode 100644
index 000000000..7aa4e58ae
--- /dev/null
+++ b/doc/release/upcoming_changes/17068.compatibility.rst
@@ -0,0 +1,4 @@
+f2py generated code may return unicode instead of byte strings
+--------------------------------------------------------------
+Some byte strings previously returned by f2py generated code may now be unicode
+strings. This results from the ongoing Python2 -> Python3 cleanup.
diff --git a/doc/release/upcoming_changes/17116.expired.rst b/doc/release/upcoming_changes/17116.expired.rst
new file mode 100644
index 000000000..d8a3a43d5
--- /dev/null
+++ b/doc/release/upcoming_changes/17116.expired.rst
@@ -0,0 +1,2 @@
+* The 14-year deprecation of ``np.ctypeslib.ctypes_load_library`` is expired.
+ Use :func:`~numpy.ctypeslib.load_library` instead, which is identical.
diff --git a/doc/source/reference/routines.ctypeslib.rst b/doc/source/reference/routines.ctypeslib.rst
index 562638e9c..3a059f5d9 100644
--- a/doc/source/reference/routines.ctypeslib.rst
+++ b/doc/source/reference/routines.ctypeslib.rst
@@ -9,6 +9,5 @@ C-Types Foreign Function Interface (:mod:`numpy.ctypeslib`)
.. autofunction:: as_array
.. autofunction:: as_ctypes
.. autofunction:: as_ctypes_type
-.. autofunction:: ctypes_load_library
.. autofunction:: load_library
.. autofunction:: ndpointer
diff --git a/numpy/__init__.py b/numpy/__init__.py
index c594928ce..41c3dc42d 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -215,12 +215,11 @@ else:
del Arrayterator
# These names were removed in NumPy 1.20. For at least one release,
- # attempts to access these names in the numpy namespace will have an
- # error message that refers to NEP 32 and points to the numpy_financial
- # library.
+ # attempts to access these names in the numpy namespace will trigger
+ # a warning, and calling the function will raise an exception.
_financial_names = ['fv', 'ipmt', 'irr', 'mirr', 'nper', 'npv', 'pmt',
'ppmt', 'pv', 'rate']
- __expired_attrs__ = {
+ __expired_functions__ = {
name: (f'In accordance with NEP 32, the function {name} was removed '
'from NumPy version 1.20. A replacement for this function '
'is available in the numpy_financial library: '
@@ -241,13 +240,19 @@ else:
# module level getattr is only supported in 3.7 onwards
# https://www.python.org/dev/peps/pep-0562/
def __getattr__(attr):
- # Raise AttributeError for expired attributes
+ # Warn for expired attributes, and return a dummy function
+ # that always raises an exception.
try:
- msg = __expired_attrs__[attr]
+ msg = __expired_functions__[attr]
except KeyError:
pass
else:
- raise AttributeError(msg)
+ warnings.warn(msg, RuntimeWarning)
+
+ def _expired(*args, **kwds):
+ raise RuntimeError(msg)
+
+ return _expired
# Emit warnings for deprecated attributes
try:
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index fad5e1774..c6cc94440 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -1237,3 +1237,114 @@ def amin(
initial: _NumberLike = ...,
where: _ArrayLikeBool = ...,
) -> Union[number, ndarray]: ...
+
+# TODO: `np.prod()``: For object arrays `initial` does not necessarily
+# have to be a numerical scalar.
+# The only requirement is that it is compatible
+# with the `.__mul__()` method(s) of the passed array's elements.
+
+# Note that the same situation holds for all wrappers around
+# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`).
+
+@overload
+def prod(
+ a: _Number,
+ axis: Optional[_ShapeLike] = ...,
+ dtype: DtypeLike = ...,
+ out: None = ...,
+ keepdims: bool = ...,
+ initial: _NumberLike = ...,
+ where: _ArrayLikeBool = ...,
+) -> _Number: ...
+@overload
+def prod(
+ a: ArrayLike,
+ axis: None = ...,
+ dtype: DtypeLike = ...,
+ out: None = ...,
+ keepdims: Literal[False] = ...,
+ initial: _NumberLike = ...,
+ where: _ArrayLikeBool = ...,
+) -> number: ...
+@overload
+def prod(
+ a: ArrayLike,
+ axis: Optional[_ShapeLike] = ...,
+ dtype: DtypeLike = ...,
+ out: Optional[ndarray] = ...,
+ keepdims: bool = ...,
+ initial: _NumberLike = ...,
+ where: _ArrayLikeBool = ...,
+) -> Union[number, ndarray]: ...
+def cumprod(
+ a: ArrayLike,
+ axis: Optional[int] = ...,
+ dtype: DtypeLike = ...,
+ out: Optional[ndarray] = ...,
+) -> ndarray: ...
+def ndim(a: ArrayLike) -> int: ...
+def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ...
+@overload
+def around(
+ a: _Number, decimals: int = ..., out: Optional[ndarray] = ...
+) -> _Number: ...
+@overload
+def around(
+ a: _NumberLike, decimals: int = ..., out: Optional[ndarray] = ...
+) -> number: ...
+@overload
+def around(
+ a: ArrayLike, decimals: int = ..., out: Optional[ndarray] = ...
+) -> ndarray: ...
+@overload
+def mean(
+ a: ArrayLike,
+ axis: None = ...,
+ dtype: DtypeLike = ...,
+ out: None = ...,
+ keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def mean(
+ a: ArrayLike,
+ axis: Optional[_ShapeLike] = ...,
+ dtype: DtypeLike = ...,
+ out: Optional[ndarray] = ...,
+ keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def std(
+ a: ArrayLike,
+ axis: None = ...,
+ dtype: DtypeLike = ...,
+ out: None = ...,
+ ddof: int = ...,
+ keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def std(
+ a: ArrayLike,
+ axis: Optional[_ShapeLike] = ...,
+ dtype: DtypeLike = ...,
+ out: Optional[ndarray] = ...,
+ ddof: int = ...,
+ keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def var(
+ a: ArrayLike,
+ axis: None = ...,
+ dtype: DtypeLike = ...,
+ out: None = ...,
+ ddof: int = ...,
+ keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def var(
+ a: ArrayLike,
+ axis: Optional[_ShapeLike] = ...,
+ dtype: DtypeLike = ...,
+ out: Optional[ndarray] = ...,
+ ddof: int = ...,
+ keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 5d9642ea8..ad1530419 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -1628,6 +1628,3 @@ def set_string_function(f, repr=True):
return multiarray.set_string_function(_default_array_str, 0)
else:
return multiarray.set_string_function(f, repr)
-
-set_string_function(_default_array_str, False)
-set_string_function(_default_array_repr, True)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index aede12080..a4a84397d 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -790,6 +790,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'descriptor.h'),
join('src', 'multiarray', 'dtypemeta.h'),
join('src', 'multiarray', 'dragon4.h'),
+ join('src', 'multiarray', 'einsum_debug.h'),
+ join('src', 'multiarray', 'einsum_sumprod.h'),
join('src', 'multiarray', 'getset.h'),
join('src', 'multiarray', 'hashdescr.h'),
join('src', 'multiarray', 'iterators.h'),
@@ -853,6 +855,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'dragon4.c'),
join('src', 'multiarray', 'dtype_transfer.c'),
join('src', 'multiarray', 'einsum.c.src'),
+ join('src', 'multiarray', 'einsum_sumprod.c.src'),
join('src', 'multiarray', 'flagsobject.c'),
join('src', 'multiarray', 'getset.c'),
join('src', 'multiarray', 'hashdescr.c'),
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c
index d626d1260..e365b49e4 100644
--- a/numpy/core/src/common/array_assign.c
+++ b/numpy/core/src/common/array_assign.c
@@ -67,12 +67,12 @@ broadcast_strides(int ndim, npy_intp const *shape,
broadcast_error: {
PyObject *errmsg;
- errmsg = PyUString_FromFormat("could not broadcast %s from shape ",
+ errmsg = PyUnicode_FromFormat("could not broadcast %s from shape ",
strides_name);
PyUString_ConcatAndDel(&errmsg,
build_shape_string(strides_ndim, strides_shape));
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" into shape "));
+ PyUnicode_FromString(" into shape "));
PyUString_ConcatAndDel(&errmsg,
build_shape_string(ndim, shape));
PyErr_SetObject(PyExc_ValueError, errmsg);
diff --git a/numpy/core/src/common/npy_cblas.h b/numpy/core/src/common/npy_cblas.h
index 97308238a..072993ec2 100644
--- a/numpy/core/src/common/npy_cblas.h
+++ b/numpy/core/src/common/npy_cblas.h
@@ -47,8 +47,10 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#ifdef HAVE_BLAS_ILP64
#define CBLAS_INT npy_int64
+#define CBLAS_INT_MAX NPY_MAX_INT64
#else
#define CBLAS_INT int
+#define CBLAS_INT_MAX INT_MAX
#endif
#define BLASNAME(name) CBLAS_FUNC(name)
@@ -59,6 +61,39 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#undef BLASINT
#undef BLASNAME
+
+/*
+ * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
+ * (BLAS won't handle negative or zero strides the way we want).
+ */
+static NPY_INLINE CBLAS_INT
+blas_stride(npy_intp stride, unsigned itemsize)
+{
+ /*
+ * Should probably check pointer alignment also, but this may cause
+ * problems if we require complex to be 16 byte aligned.
+ */
+ if (stride > 0 && (stride % itemsize) == 0) {
+ stride /= itemsize;
+ if (stride <= CBLAS_INT_MAX) {
+ return stride;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Define a chunksize for CBLAS.
+ *
+ * The chunksize is the greatest power of two less than CBLAS_INT_MAX.
+ */
+#if NPY_MAX_INTP > CBLAS_INT_MAX
+# define NPY_CBLAS_CHUNK (CBLAS_INT_MAX / 2 + 1)
+#else
+# define NPY_CBLAS_CHUNK NPY_MAX_INTP
+#endif
+
+
#ifdef __cplusplus
}
#endif
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index da631c830..9c1fa0bad 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -1902,7 +1902,7 @@ PrintFloat_Printf_g(PyObject *obj, int precision)
PyOS_snprintf(str, sizeof(str), "%.*g", precision, val);
}
- return PyUString_FromString(str);
+ return PyUnicode_FromString(str);
}
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 95c650674..5da1b5f29 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -416,7 +416,7 @@ WARN_IN_DEALLOC(PyObject* warning, const char * msg) {
if (PyErr_WarnEx(warning, msg, 1) < 0) {
PyObject * s;
- s = PyUString_FromString("array_dealloc");
+ s = PyUnicode_FromString("array_dealloc");
if (s) {
PyErr_WriteUnraisable(s);
Py_DECREF(s);
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 9508fb5ad..3fee587b9 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -865,7 +865,7 @@ VOID_setitem(PyObject *op, void *input, void *vap)
npy_intp names_size = PyTuple_GET_SIZE(descr->names);
if (names_size != PyTuple_Size(op)) {
- errmsg = PyUString_FromFormat(
+ errmsg = PyUnicode_FromFormat(
"could not assign tuple of length %zd to structure "
"with %" NPY_INTP_FMT " fields.",
PyTuple_Size(op), names_size);
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index 25bb2d195..af40cdc2c 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -931,7 +931,7 @@ _descriptor_from_pep3118_format(char const *s)
}
*p = '\0';
- str = PyUString_FromStringAndSize(buf, strlen(buf));
+ str = PyUnicode_FromStringAndSize(buf, strlen(buf));
if (str == NULL) {
free(buf);
return NULL;
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 3d3ac7709..5f8250fb7 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -133,7 +133,7 @@ NPY_NO_EXPORT PyArray_Descr *
_array_typedescr_fromstr(char const *c_str)
{
PyArray_Descr *descr = NULL;
- PyObject *stringobj = PyString_FromString(c_str);
+ PyObject *stringobj = PyBytes_FromString(c_str);
if (stringobj == NULL) {
return NULL;
@@ -264,10 +264,10 @@ convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending)
for (i = 0; i < n && vals[i] < 0; i++);
if (i == n) {
- return PyUString_FromFormat("()%s", ending);
+ return PyUnicode_FromFormat("()%s", ending);
}
else {
- ret = PyUString_FromFormat("(%" NPY_INTP_FMT, vals[i++]);
+ ret = PyUnicode_FromFormat("(%" NPY_INTP_FMT, vals[i++]);
if (ret == NULL) {
return NULL;
}
@@ -275,10 +275,10 @@ convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending)
for (; i < n; ++i) {
if (vals[i] < 0) {
- tmp = PyUString_FromString(",newaxis");
+ tmp = PyUnicode_FromString(",newaxis");
}
else {
- tmp = PyUString_FromFormat(",%" NPY_INTP_FMT, vals[i]);
+ tmp = PyUnicode_FromFormat(",%" NPY_INTP_FMT, vals[i]);
}
if (tmp == NULL) {
Py_DECREF(ret);
@@ -292,10 +292,10 @@ convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending)
}
if (i == 1) {
- tmp = PyUString_FromFormat(",)%s", ending);
+ tmp = PyUnicode_FromFormat(",)%s", ending);
}
else {
- tmp = PyUString_FromFormat(")%s", ending);
+ tmp = PyUnicode_FromFormat(")%s", ending);
}
PyUString_ConcatAndDel(&ret, tmp);
return ret;
@@ -310,7 +310,7 @@ dot_alignment_error(PyArrayObject *a, int i, PyArrayObject *b, int j)
*shape1 = NULL, *shape2 = NULL,
*shape1_i = NULL, *shape2_j = NULL;
- format = PyUString_FromString("shapes %s and %s not aligned:"
+ format = PyUnicode_FromString("shapes %s and %s not aligned:"
" %d (dim %d) != %d (dim %d)");
shape1 = convert_shape_to_string(PyArray_NDIM(a), PyArray_DIMS(a), "");
@@ -333,7 +333,7 @@ dot_alignment_error(PyArrayObject *a, int i, PyArrayObject *b, int j)
goto end;
}
- errmsg = PyUString_Format(format, fmt_args);
+ errmsg = PyUnicode_Format(format, fmt_args);
if (errmsg != NULL) {
PyErr_SetObject(PyExc_ValueError, errmsg);
}
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 793cefaf8..4410825fa 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -292,43 +292,6 @@ npy_memchr(char * haystack, char needle,
return p;
}
-/*
- * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
- * (BLAS won't handle negative or zero strides the way we want).
- */
-static NPY_INLINE int
-blas_stride(npy_intp stride, unsigned itemsize)
-{
- /*
- * Should probably check pointer alignment also, but this may cause
- * problems if we require complex to be 16 byte aligned.
- */
- if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
- stride /= itemsize;
-#ifndef HAVE_BLAS_ILP64
- if (stride <= INT_MAX) {
-#else
- if (stride <= NPY_MAX_INT64) {
-#endif
- return stride;
- }
- }
- return 0;
-}
-
-/*
- * Define a chunksize for CBLAS. CBLAS counts in integers.
- */
-#if NPY_MAX_INTP > INT_MAX
-# ifndef HAVE_BLAS_ILP64
-# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
-# else
-# define NPY_CBLAS_CHUNK (NPY_MAX_INT64 / 2 + 1)
-# endif
-#else
-# define NPY_CBLAS_CHUNK NPY_MAX_INTP
-#endif
-
#include "ucsnarrow.h"
/*
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 41a10afdc..b68b9322d 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -248,13 +248,13 @@ PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format)
return -1;
}
PyTuple_SET_ITEM(tupobj,0,obj);
- obj = PyUString_FromString((const char *)format);
+ obj = PyUnicode_FromString((const char *)format);
if (obj == NULL) {
Py_DECREF(tupobj);
Py_DECREF(it);
return -1;
}
- strobj = PyUString_Format(obj, tupobj);
+ strobj = PyUnicode_Format(obj, tupobj);
Py_DECREF(obj);
Py_DECREF(tupobj);
if (strobj == NULL) {
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index bd038c53a..1f5845eb7 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -95,8 +95,11 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
key = PyLong_FromLong(type_num);
cobj = PyDict_GetItem(obj, key);
Py_DECREF(key);
- if (cobj && NpyCapsule_Check(cobj)) {
- castfunc = NpyCapsule_AsVoidPtr(cobj);
+ if (cobj && PyCapsule_CheckExact(cobj)) {
+ castfunc = PyCapsule_GetPointer(cobj, NULL);
+ if (castfunc == NULL) {
+ return NULL;
+ }
}
}
}
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 3ff397817..6add032bf 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -868,11 +868,14 @@ PyArray_NewFromDescr_int(
func = PyObject_GetAttr((PyObject *)fa, npy_ma_str_array_finalize);
if (func && func != Py_None) {
- if (NpyCapsule_Check(func)) {
+ if (PyCapsule_CheckExact(func)) {
/* A C-function is stored here */
PyArray_FinalizeFunc *cfunc;
- cfunc = NpyCapsule_AsVoidPtr(func);
+ cfunc = PyCapsule_GetPointer(func, NULL);
Py_DECREF(func);
+ if (cfunc == NULL) {
+ goto fail;
+ }
if (cfunc((PyArrayObject *)fa, obj) < 0) {
goto fail;
}
@@ -1747,7 +1750,7 @@ PyArray_FromStructInterface(PyObject *input)
return Py_NotImplemented;
}
}
- if (!NpyCapsule_Check(attr)) {
+ if (!PyCapsule_CheckExact(attr)) {
if (PyType_Check(input) && PyObject_HasAttrString(attr, "__get__")) {
/*
* If the input is a class `attr` should be a property-like object.
@@ -1759,7 +1762,10 @@ PyArray_FromStructInterface(PyObject *input)
}
goto fail;
}
- inter = NpyCapsule_AsVoidPtr(attr);
+ inter = PyCapsule_GetPointer(attr, NULL);
+ if (inter == NULL) {
+ goto fail;
+ }
if (inter->two != 2) {
goto fail;
}
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 3649bbe4c..348473309 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -1435,14 +1435,14 @@ raise_if_datetime64_metadata_cast_error(char *object_type,
}
else {
PyObject *errmsg;
- errmsg = PyUString_FromFormat("Cannot cast %s "
+ errmsg = PyUnicode_FromFormat("Cannot cast %s "
"from metadata ", object_type);
errmsg = append_metastr_to_string(src_meta, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" to "));
+ PyUnicode_FromString(" to "));
errmsg = append_metastr_to_string(dst_meta, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromFormat(" according to the rule %s",
+ PyUnicode_FromFormat(" according to the rule %s",
npy_casting_to_string(casting)));
PyErr_SetObject(PyExc_TypeError, errmsg);
Py_DECREF(errmsg);
@@ -1467,14 +1467,14 @@ raise_if_timedelta64_metadata_cast_error(char *object_type,
}
else {
PyObject *errmsg;
- errmsg = PyUString_FromFormat("Cannot cast %s "
+ errmsg = PyUnicode_FromFormat("Cannot cast %s "
"from metadata ", object_type);
errmsg = append_metastr_to_string(src_meta, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" to "));
+ PyUnicode_FromString(" to "));
errmsg = append_metastr_to_string(dst_meta, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromFormat(" according to the rule %s",
+ PyUnicode_FromFormat(" according to the rule %s",
npy_casting_to_string(casting)));
PyErr_SetObject(PyExc_TypeError, errmsg);
Py_DECREF(errmsg);
@@ -1601,15 +1601,15 @@ compute_datetime_metadata_greatest_common_divisor(
incompatible_units: {
PyObject *errmsg;
- errmsg = PyUString_FromString("Cannot get "
+ errmsg = PyUnicode_FromString("Cannot get "
"a common metadata divisor for "
"NumPy datetime metadata ");
errmsg = append_metastr_to_string(meta1, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" and "));
+ PyUnicode_FromString(" and "));
errmsg = append_metastr_to_string(meta2, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" because they have "
+ PyUnicode_FromString(" because they have "
"incompatible nonlinear base time units"));
PyErr_SetObject(PyExc_TypeError, errmsg);
Py_DECREF(errmsg);
@@ -1617,12 +1617,12 @@ incompatible_units: {
}
units_overflow: {
PyObject *errmsg;
- errmsg = PyUString_FromString("Integer overflow "
+ errmsg = PyUnicode_FromString("Integer overflow "
"getting a common metadata divisor for "
"NumPy datetime metadata ");
errmsg = append_metastr_to_string(meta1, 0, errmsg);
PyUString_ConcatAndDel(&errmsg,
- PyUString_FromString(" and "));
+ PyUnicode_FromString(" and "));
errmsg = append_metastr_to_string(meta2, 0, errmsg);
PyErr_SetObject(PyExc_OverflowError, errmsg);
Py_DECREF(errmsg);
@@ -1747,7 +1747,7 @@ convert_datetime_metadata_to_tuple(PyArray_DatetimeMetaData *meta)
}
PyTuple_SET_ITEM(dt_tuple, 0,
- PyUString_FromString(_datetime_strings[meta->base]));
+ PyUnicode_FromString(_datetime_strings[meta->base]));
PyTuple_SET_ITEM(dt_tuple, 1,
PyLong_FromLong(meta->num));
@@ -1771,7 +1771,7 @@ convert_datetime_metadata_tuple_to_datetime_metadata(PyObject *tuple,
if (!PyTuple_Check(tuple)) {
PyObject *errmsg;
- errmsg = PyUString_FromString("Require tuple for tuple to NumPy "
+ errmsg = PyUnicode_FromString("Require tuple for tuple to NumPy "
"datetime metadata conversion, not ");
PyUString_ConcatAndDel(&errmsg, PyObject_Repr(tuple));
PyErr_SetObject(PyExc_TypeError, errmsg);
@@ -1973,7 +1973,7 @@ append_metastr_to_string(PyArray_DatetimeMetaData *meta,
if (meta->base == NPY_FR_GENERIC) {
/* Without brackets, give a string "generic" */
if (skip_brackets) {
- PyUString_ConcatAndDel(&ret, PyUString_FromString("generic"));
+ PyUString_ConcatAndDel(&ret, PyUnicode_FromString("generic"));
return ret;
}
/* But with brackets, append nothing */
@@ -1994,18 +1994,18 @@ append_metastr_to_string(PyArray_DatetimeMetaData *meta,
if (num == 1) {
if (skip_brackets) {
- res = PyUString_FromFormat("%s", basestr);
+ res = PyUnicode_FromFormat("%s", basestr);
}
else {
- res = PyUString_FromFormat("[%s]", basestr);
+ res = PyUnicode_FromFormat("[%s]", basestr);
}
}
else {
if (skip_brackets) {
- res = PyUString_FromFormat("%d%s", num, basestr);
+ res = PyUnicode_FromFormat("%d%s", num, basestr);
}
else {
- res = PyUString_FromFormat("[%d%s]", num, basestr);
+ res = PyUnicode_FromFormat("[%d%s]", num, basestr);
}
}
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index f47f0ce06..95597b812 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -472,7 +472,7 @@ _convert_from_array_descr(PyObject *obj, int align)
if (PyUnicode_GetLength(name) == 0) {
Py_DECREF(name);
if (title == NULL) {
- name = PyUString_FromFormat("f%d", i);
+ name = PyUnicode_FromFormat("f%d", i);
if (name == NULL) {
goto fail;
}
@@ -673,7 +673,7 @@ _convert_from_list(PyObject *obj, int align)
}
PyTuple_SET_ITEM(tup, 0, (PyObject *)conv);
PyTuple_SET_ITEM(tup, 1, size_obj);
- PyObject *key = PyUString_FromFormat("f%d", i);
+ PyObject *key = PyUnicode_FromFormat("f%d", i);
if (!key) {
Py_DECREF(tup);
goto fail;
@@ -1887,10 +1887,10 @@ arraydescr_protocol_typestr_get(PyArray_Descr *self)
size >>= 2;
}
if (self->type_num == NPY_OBJECT) {
- ret = PyUString_FromFormat("%c%c", endian, basic_);
+ ret = PyUnicode_FromFormat("%c%c", endian, basic_);
}
else {
- ret = PyUString_FromFormat("%c%c%d", endian, basic_, size);
+ ret = PyUnicode_FromFormat("%c%c%d", endian, basic_, size);
}
if (PyDataType_ISDATETIME(self)) {
PyArray_DatetimeMetaData *meta;
@@ -1974,7 +1974,7 @@ arraydescr_protocol_descr_get(PyArray_Descr *self)
if (dobj == NULL) {
return NULL;
}
- PyTuple_SET_ITEM(dobj, 0, PyUString_FromString(""));
+ PyTuple_SET_ITEM(dobj, 0, PyUnicode_FromString(""));
PyTuple_SET_ITEM(dobj, 1, arraydescr_protocol_typestr_get(self));
res = PyList_New(1);
if (res == NULL) {
@@ -2450,7 +2450,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args))
if (self->type_num == NPY_UNICODE) {
elsize >>= 2;
}
- obj = PyUString_FromFormat("%c%d",self->kind, elsize);
+ obj = PyUnicode_FromFormat("%c%d",self->kind, elsize);
}
PyTuple_SET_ITEM(ret, 1, Py_BuildValue("(NOO)", obj, Py_False, Py_True));
@@ -2492,7 +2492,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args))
PyTuple_SET_ITEM(state, 0, PyLong_FromLong(3));
}
- PyTuple_SET_ITEM(state, 1, PyUString_FromFormat("%c", endian));
+ PyTuple_SET_ITEM(state, 1, PyUnicode_FromFormat("%c", endian));
PyTuple_SET_ITEM(state, 2, arraydescr_subdescr_get(self));
if (PyDataType_HASFIELDS(self)) {
Py_INCREF(self->names);
@@ -2894,7 +2894,7 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args)
PyArray_DatetimeMetaData temp_dt_data;
if ((! PyTuple_Check(metadata)) || (PyTuple_Size(metadata) != 2)) {
- errmsg = PyUString_FromString("Invalid datetime dtype (metadata, c_metadata): ");
+ errmsg = PyUnicode_FromString("Invalid datetime dtype (metadata, c_metadata): ");
PyUString_ConcatAndDel(&errmsg, PyObject_Repr(metadata));
PyErr_SetObject(PyExc_ValueError, errmsg);
Py_DECREF(errmsg);
@@ -3393,7 +3393,7 @@ arraydescr_field_subset_view(PyArray_Descr *self, PyObject *ind)
/* disallow duplicate field indices */
if (PyDict_Contains(fields, name)) {
PyObject *msg = NULL;
- PyObject *fmt = PyUString_FromString(
+ PyObject *fmt = PyUnicode_FromString(
"duplicate field of name {!r}");
if (fmt != NULL) {
msg = PyObject_CallMethod(fmt, "format", "O", name);
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
index 553d0effb..a7b252a77 100644
--- a/numpy/core/src/multiarray/dragon4.c
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -3093,7 +3093,7 @@ Dragon4_Positional_##Type##_opt(npy_type *val, Dragon4_Options *opt)\
free_dragon4_bigint_scratch(scratch);\
return NULL;\
}\
- ret = PyUString_FromString(scratch->repr);\
+ ret = PyUnicode_FromString(scratch->repr);\
free_dragon4_bigint_scratch(scratch);\
return ret;\
}\
@@ -3130,7 +3130,7 @@ Dragon4_Scientific_##Type##_opt(npy_type *val, Dragon4_Options *opt)\
free_dragon4_bigint_scratch(scratch);\
return NULL;\
}\
- ret = PyUString_FromString(scratch->repr);\
+ ret = PyUnicode_FromString(scratch->repr);\
free_dragon4_bigint_scratch(scratch);\
return ret;\
}\
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index 2538e05c6..6ad375f67 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -16,7 +16,6 @@
#define _MULTIARRAYMODULE
#include <numpy/npy_common.h>
#include <numpy/arrayobject.h>
-#include <numpy/halffloat.h>
#include <npy_pycompat.h>
#include <ctype.h>
@@ -25,1898 +24,8 @@
#include "common.h"
#include "ctors.h"
-#ifdef NPY_HAVE_SSE_INTRINSICS
-#define EINSUM_USE_SSE1 1
-#else
-#define EINSUM_USE_SSE1 0
-#endif
-
-#ifdef NPY_HAVE_SSE2_INTRINSICS
-#define EINSUM_USE_SSE2 1
-#else
-#define EINSUM_USE_SSE2 0
-#endif
-
-#if EINSUM_USE_SSE1
-#include <xmmintrin.h>
-#endif
-
-#if EINSUM_USE_SSE2
-#include <emmintrin.h>
-#endif
-
-#define EINSUM_IS_SSE_ALIGNED(x) ((((npy_intp)x)&0xf) == 0)
-
-/********** PRINTF DEBUG TRACING **************/
-#define NPY_EINSUM_DBG_TRACING 0
-
-#if NPY_EINSUM_DBG_TRACING
-#define NPY_EINSUM_DBG_PRINT(s) printf("%s", s);
-#define NPY_EINSUM_DBG_PRINT1(s, p1) printf(s, p1);
-#define NPY_EINSUM_DBG_PRINT2(s, p1, p2) printf(s, p1, p2);
-#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3) printf(s);
-#else
-#define NPY_EINSUM_DBG_PRINT(s)
-#define NPY_EINSUM_DBG_PRINT1(s, p1)
-#define NPY_EINSUM_DBG_PRINT2(s, p1, p2)
-#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3)
-#endif
-/**********************************************/
-
-/**begin repeat
- * #name = byte, short, int, long, longlong,
- * ubyte, ushort, uint, ulong, ulonglong,
- * half, float, double, longdouble,
- * cfloat, cdouble, clongdouble#
- * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
- * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong,
- * npy_half, npy_float, npy_double, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble#
- * #temptype = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
- * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong,
- * npy_float, npy_float, npy_double, npy_longdouble,
- * npy_float, npy_double, npy_longdouble#
- * #to = ,,,,,
- * ,,,,,
- * npy_float_to_half,,,,
- * ,,#
- * #from = ,,,,,
- * ,,,,,
- * npy_half_to_float,,,,
- * ,,#
- * #complex = 0*5,
- * 0*5,
- * 0*4,
- * 1*3#
- * #float32 = 0*5,
- * 0*5,
- * 0,1,0,0,
- * 0*3#
- * #float64 = 0*5,
- * 0*5,
- * 0,0,1,0,
- * 0*3#
- */
-
-/**begin repeat1
- * #nop = 1, 2, 3, 1000#
- * #noplabel = one, two, three, any#
- */
-static void
-@name@_sum_of_products_@noplabel@(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
-#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
- char *data0 = dataptr[0];
- npy_intp stride0 = strides[0];
-#endif
-#if (@nop@ == 2 || @nop@ == 3) && !@complex@
- char *data1 = dataptr[1];
- npy_intp stride1 = strides[1];
-#endif
-#if (@nop@ == 3) && !@complex@
- char *data2 = dataptr[2];
- npy_intp stride2 = strides[2];
-#endif
-#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
- char *data_out = dataptr[@nop@];
- npy_intp stride_out = strides[@nop@];
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_@noplabel@ (%d)\n", (int)count);
-
- while (count--) {
-#if !@complex@
-# if @nop@ == 1
- *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) +
- @from@(*(@type@ *)data_out));
- data0 += stride0;
- data_out += stride_out;
-# elif @nop@ == 2
- *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) *
- @from@(*(@type@ *)data1) +
- @from@(*(@type@ *)data_out));
- data0 += stride0;
- data1 += stride1;
- data_out += stride_out;
-# elif @nop@ == 3
- *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) *
- @from@(*(@type@ *)data1) *
- @from@(*(@type@ *)data2) +
- @from@(*(@type@ *)data_out));
- data0 += stride0;
- data1 += stride1;
- data2 += stride2;
- data_out += stride_out;
-# else
- @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
- int i;
- for (i = 1; i < nop; ++i) {
- temp *= @from@(*(@type@ *)dataptr[i]);
- }
- *(@type@ *)dataptr[nop] = @to@(temp +
- @from@(*(@type@ *)dataptr[i]));
- for (i = 0; i <= nop; ++i) {
- dataptr[i] += strides[i];
- }
-# endif
-#else /* complex */
-# if @nop@ == 1
- ((@temptype@ *)data_out)[0] = ((@temptype@ *)data0)[0] +
- ((@temptype@ *)data_out)[0];
- ((@temptype@ *)data_out)[1] = ((@temptype@ *)data0)[1] +
- ((@temptype@ *)data_out)[1];
- data0 += stride0;
- data_out += stride_out;
-# else
-# if @nop@ <= 3
-#define _SUMPROD_NOP @nop@
-# else
-#define _SUMPROD_NOP nop
-# endif
- @temptype@ re, im, tmp;
- int i;
- re = ((@temptype@ *)dataptr[0])[0];
- im = ((@temptype@ *)dataptr[0])[1];
- for (i = 1; i < _SUMPROD_NOP; ++i) {
- tmp = re * ((@temptype@ *)dataptr[i])[0] -
- im * ((@temptype@ *)dataptr[i])[1];
- im = re * ((@temptype@ *)dataptr[i])[1] +
- im * ((@temptype@ *)dataptr[i])[0];
- re = tmp;
- }
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re +
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[0];
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im +
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[1];
-
- for (i = 0; i <= _SUMPROD_NOP; ++i) {
- dataptr[i] += strides[i];
- }
-#undef _SUMPROD_NOP
-# endif
-#endif
- }
-}
-
-#if @nop@ == 1
-
-static void
-@name@_sum_of_products_contig_one(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @type@ *data_out = (@type@ *)dataptr[1];
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_one (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
-#if !@complex@
- data_out[@i@] = @to@(@from@(data0[@i@]) +
- @from@(data_out[@i@]));
-#else
- ((@temptype@ *)data_out + 2*@i@)[0] =
- ((@temptype@ *)data0 + 2*@i@)[0] +
- ((@temptype@ *)data_out + 2*@i@)[0];
- ((@temptype@ *)data_out + 2*@i@)[1] =
- ((@temptype@ *)data0 + 2*@i@)[1] +
- ((@temptype@ *)data_out + 2*@i@)[1];
-#endif
-/**end repeat2**/
- case 0:
- return;
- }
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
-#if !@complex@
- data_out[@i@] = @to@(@from@(data0[@i@]) +
- @from@(data_out[@i@]));
-#else /* complex */
- ((@temptype@ *)data_out + 2*@i@)[0] =
- ((@temptype@ *)data0 + 2*@i@)[0] +
- ((@temptype@ *)data_out + 2*@i@)[0];
- ((@temptype@ *)data_out + 2*@i@)[1] =
- ((@temptype@ *)data0 + 2*@i@)[1] +
- ((@temptype@ *)data_out + 2*@i@)[1];
-#endif
-/**end repeat2**/
- data0 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-#elif @nop@ == 2 && !@complex@
-
-static void
-@name@_sum_of_products_contig_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @type@ *data1 = (@type@ *)dataptr[1];
- @type@ *data_out = (@type@ *)dataptr[2];
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, b;
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, b;
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- data_out[@i@] = @to@(@from@(data0[@i@]) *
- @from@(data1[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
- case 0:
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) &&
- EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@));
- b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
- _mm_store_ps(data_out+@i@, b);
-/**end repeat2**/
- data0 += 8;
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) &&
- EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@));
- b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
- _mm_store_pd(data_out+@i@, b);
-/**end repeat2**/
- data0 += 8;
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@));
- b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
- _mm_storeu_ps(data_out+@i@, b);
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@));
- b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
- _mm_storeu_pd(data_out+@i@, b);
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- data_out[@i@] = @to@(@from@(data0[@i@]) *
- @from@(data1[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
-#endif
- data0 += 8;
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-/* Some extra specializations for the two operand case */
-static void
-@name@_sum_of_products_stride0_contig_outcontig_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @temptype@ value0 = @from@(*(@type@ *)dataptr[0]);
- @type@ *data1 = (@type@ *)dataptr[1];
- @type@ *data_out = (@type@ *)dataptr[2];
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, b, value0_sse;
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, b, value0_sse;
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outcontig_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- data_out[@i@] = @to@(value0 *
- @from@(data1[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
- case 0:
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- value0_sse = _mm_set_ps1(value0);
-
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(value0_sse, _mm_load_ps(data1+@i@));
- b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
- _mm_store_ps(data_out+@i@, b);
-/**end repeat2**/
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- if (count > 0) {
- goto finish_after_unrolled_loop;
- }
- else {
- return;
- }
- }
-#elif EINSUM_USE_SSE2 && @float64@
- value0_sse = _mm_set1_pd(value0);
-
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(value0_sse, _mm_load_pd(data1+@i@));
- b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
- _mm_store_pd(data_out+@i@, b);
-/**end repeat2**/
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- if (count > 0) {
- goto finish_after_unrolled_loop;
- }
- else {
- return;
- }
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(value0_sse, _mm_loadu_ps(data1+@i@));
- b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
- _mm_storeu_ps(data_out+@i@, b);
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(value0_sse, _mm_loadu_pd(data1+@i@));
- b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
- _mm_storeu_pd(data_out+@i@, b);
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- data_out[@i@] = @to@(value0 *
- @from@(data1[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
-#endif
- data1 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- if (count > 0) {
- goto finish_after_unrolled_loop;
- }
-}
-
-static void
-@name@_sum_of_products_contig_stride0_outcontig_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @temptype@ value1 = @from@(*(@type@ *)dataptr[1]);
- @type@ *data_out = (@type@ *)dataptr[2];
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, b, value1_sse;
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, b, value1_sse;
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outcontig_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- data_out[@i@] = @to@(@from@(data0[@i@])*
- value1 +
- @from@(data_out[@i@]));
-/**end repeat2**/
- case 0:
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- value1_sse = _mm_set_ps1(value1);
-
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(_mm_load_ps(data0+@i@), value1_sse);
- b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
- _mm_store_ps(data_out+@i@, b);
-/**end repeat2**/
- data0 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- value1_sse = _mm_set1_pd(value1);
-
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(_mm_load_pd(data0+@i@), value1_sse);
- b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
- _mm_store_pd(data_out+@i@, b);
-/**end repeat2**/
- data0 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
-/**begin repeat2
- * #i = 0, 4#
- */
- a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), value1_sse);
- b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
- _mm_storeu_ps(data_out+@i@, b);
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), value1_sse);
- b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
- _mm_storeu_pd(data_out+@i@, b);
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- data_out[@i@] = @to@(@from@(data0[@i@])*
- value1 +
- @from@(data_out[@i@]));
-/**end repeat2**/
-#endif
- data0 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-static void
-@name@_sum_of_products_contig_contig_outstride0_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @type@ *data1 = (@type@ *)dataptr[1];
- @temptype@ accum = 0;
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, accum_sse = _mm_setzero_ps();
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, accum_sse = _mm_setzero_pd();
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_contig_outstride0_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- accum += @from@(data0[@i@]) * @from@(data1[@i@]);
-/**end repeat2**/
- case 0:
- *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum);
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
- _mm_prefetch(data1 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@));
- accum_sse = _mm_add_ps(accum_sse, a);
-/**end repeat2**/
- data0 += 8;
- data1 += 8;
- }
-
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
- _mm_prefetch(data1 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@));
- accum_sse = _mm_add_pd(accum_sse, a);
-/**end repeat2**/
- data0 += 8;
- data1 += 8;
- }
-
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
- _mm_prefetch(data1 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@));
- accum_sse = _mm_add_ps(accum_sse, a);
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
- _mm_prefetch(data1 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@));
- accum_sse = _mm_add_pd(accum_sse, a);
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- accum += @from@(data0[@i@]) * @from@(data1[@i@]);
-/**end repeat2**/
-#endif
- data0 += 8;
- data1 += 8;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-#elif EINSUM_USE_SSE2 && @float64@
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-#endif
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-static void
-@name@_sum_of_products_stride0_contig_outstride0_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @temptype@ value0 = @from@(*(@type@ *)dataptr[0]);
- @type@ *data1 = (@type@ *)dataptr[1];
- @temptype@ accum = 0;
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, accum_sse = _mm_setzero_ps();
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, accum_sse = _mm_setzero_pd();
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outstride0_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- accum += @from@(data1[@i@]);
-/**end repeat2**/
- case 0:
- *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + value0 * accum);
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data1)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data1+@i@));
-/**end repeat2**/
- data1 += 8;
- }
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data1)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data1+@i@));
-/**end repeat2**/
- data1 += 8;
- }
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data1+@i@));
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data1+@i@));
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- accum += @from@(data1[@i@]);
-/**end repeat2**/
-#endif
- data1 += 8;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-#elif EINSUM_USE_SSE2 && @float64@
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-#endif
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-static void
-@name@_sum_of_products_contig_stride0_outstride0_two(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @temptype@ value1 = @from@(*(@type@ *)dataptr[1]);
- @temptype@ accum = 0;
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, accum_sse = _mm_setzero_ps();
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, accum_sse = _mm_setzero_pd();
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outstride0_two (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
- accum += @from@(data0[@i@]);
-/**end repeat2**/
- case 0:
- *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum * value1);
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@));
-/**end repeat2**/
- data0 += 8;
- }
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@));
-/**end repeat2**/
- data0 += 8;
- }
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@));
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@));
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- accum += @from@(data0[@i@]);
-/**end repeat2**/
-#endif
- data0 += 8;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-#elif EINSUM_USE_SSE2 && @float64@
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-#endif
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-#elif @nop@ == 3 && !@complex@
-
-static void
-@name@_sum_of_products_contig_three(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- @type@ *data0 = (@type@ *)dataptr[0];
- @type@ *data1 = (@type@ *)dataptr[1];
- @type@ *data2 = (@type@ *)dataptr[2];
- @type@ *data_out = (@type@ *)dataptr[3];
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- data_out[@i@] = @to@(@from@(data0[@i@]) *
- @from@(data1[@i@]) *
- @from@(data2[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
- data0 += 8;
- data1 += 8;
- data2 += 8;
- data_out += 8;
- }
-
- /* Finish off the loop */
-
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- if (count-- == 0) {
- return;
- }
- data_out[@i@] = @to@(@from@(data0[@i@]) *
- @from@(data1[@i@]) *
- @from@(data2[@i@]) +
- @from@(data_out[@i@]));
-/**end repeat2**/
-}
-
-#else /* @nop@ > 3 || @complex */
-
-static void
-@name@_sum_of_products_contig_@noplabel@(int nop, char **dataptr,
- npy_intp const *NPY_UNUSED(strides), npy_intp count)
-{
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_@noplabel@ (%d)\n",
- (int)count);
-
- while (count--) {
-#if !@complex@
- @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
- int i;
- for (i = 1; i < nop; ++i) {
- temp *= @from@(*(@type@ *)dataptr[i]);
- }
- *(@type@ *)dataptr[nop] = @to@(temp +
- @from@(*(@type@ *)dataptr[i]));
- for (i = 0; i <= nop; ++i) {
- dataptr[i] += sizeof(@type@);
- }
-#else /* complex */
-# if @nop@ <= 3
-# define _SUMPROD_NOP @nop@
-# else
-# define _SUMPROD_NOP nop
-# endif
- @temptype@ re, im, tmp;
- int i;
- re = ((@temptype@ *)dataptr[0])[0];
- im = ((@temptype@ *)dataptr[0])[1];
- for (i = 1; i < _SUMPROD_NOP; ++i) {
- tmp = re * ((@temptype@ *)dataptr[i])[0] -
- im * ((@temptype@ *)dataptr[i])[1];
- im = re * ((@temptype@ *)dataptr[i])[1] +
- im * ((@temptype@ *)dataptr[i])[0];
- re = tmp;
- }
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re +
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[0];
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im +
- ((@temptype@ *)dataptr[_SUMPROD_NOP])[1];
-
- for (i = 0; i <= _SUMPROD_NOP; ++i) {
- dataptr[i] += sizeof(@type@);
- }
-# undef _SUMPROD_NOP
-#endif
- }
-}
-
-#endif /* functions for various @nop@ */
-
-#if @nop@ == 1
-
-static void
-@name@_sum_of_products_contig_outstride0_one(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
-#if @complex@
- @temptype@ accum_re = 0, accum_im = 0;
- @temptype@ *data0 = (@temptype@ *)dataptr[0];
-#else
- @temptype@ accum = 0;
- @type@ *data0 = (@type@ *)dataptr[0];
-#endif
-
-#if EINSUM_USE_SSE1 && @float32@
- __m128 a, accum_sse = _mm_setzero_ps();
-#elif EINSUM_USE_SSE2 && @float64@
- __m128d a, accum_sse = _mm_setzero_pd();
-#endif
-
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_outstride0_one (%d)\n",
- (int)count);
-
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat2
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
-#if !@complex@
- accum += @from@(data0[@i@]);
-#else /* complex */
- accum_re += data0[2*@i@+0];
- accum_im += data0[2*@i@+1];
-#endif
-/**end repeat2**/
- case 0:
-#if @complex@
- ((@temptype@ *)dataptr[1])[0] += accum_re;
- ((@temptype@ *)dataptr[1])[1] += accum_im;
-#else
- *((@type@ *)dataptr[1]) = @to@(accum +
- @from@(*((@type@ *)dataptr[1])));
-#endif
- return;
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@));
-/**end repeat2**/
- data0 += 8;
- }
-
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#elif EINSUM_USE_SSE2 && @float64@
- /* Use aligned instructions if possible */
- if (EINSUM_IS_SSE_ALIGNED(data0)) {
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@));
-/**end repeat2**/
- data0 += 8;
- }
-
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
- }
-#endif
-
- /* Unroll the loop by 8 */
- while (count >= 8) {
- count -= 8;
-
-#if EINSUM_USE_SSE1 && @float32@
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 4#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@));
-/**end repeat2**/
-#elif EINSUM_USE_SSE2 && @float64@
- _mm_prefetch(data0 + 512, _MM_HINT_T0);
-
-/**begin repeat2
- * #i = 0, 2, 4, 6#
- */
- /*
- * NOTE: This accumulation changes the order, so will likely
- * produce slightly different results.
- */
- accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@));
-/**end repeat2**/
-#else
-/**begin repeat2
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
-# if !@complex@
- accum += @from@(data0[@i@]);
-# else /* complex */
- accum_re += data0[2*@i@+0];
- accum_im += data0[2*@i@+1];
-# endif
-/**end repeat2**/
-#endif
-
-#if !@complex@
- data0 += 8;
-#else
- data0 += 8*2;
-#endif
- }
-
-#if EINSUM_USE_SSE1 && @float32@
- /* Add the four SSE values and put in accum */
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
- accum_sse = _mm_add_ps(a, accum_sse);
- a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
- accum_sse = _mm_add_ps(a, accum_sse);
- _mm_store_ss(&accum, accum_sse);
-#elif EINSUM_USE_SSE2 && @float64@
- /* Add the two SSE2 values and put in accum */
- a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
- accum_sse = _mm_add_pd(a, accum_sse);
- _mm_store_sd(&accum, accum_sse);
-#endif
-
- /* Finish off the loop */
- goto finish_after_unrolled_loop;
-}
-
-#endif /* @nop@ == 1 */
-
-static void
-@name@_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
-#if @complex@
- @temptype@ accum_re = 0, accum_im = 0;
-#else
- @temptype@ accum = 0;
-#endif
-
-#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
- char *data0 = dataptr[0];
- npy_intp stride0 = strides[0];
-#endif
-#if (@nop@ == 2 || @nop@ == 3) && !@complex@
- char *data1 = dataptr[1];
- npy_intp stride1 = strides[1];
-#endif
-#if (@nop@ == 3) && !@complex@
- char *data2 = dataptr[2];
- npy_intp stride2 = strides[2];
-#endif
-
- NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_outstride0_@noplabel@ (%d)\n",
- (int)count);
-
- while (count--) {
-#if !@complex@
-# if @nop@ == 1
- accum += @from@(*(@type@ *)data0);
- data0 += stride0;
-# elif @nop@ == 2
- accum += @from@(*(@type@ *)data0) *
- @from@(*(@type@ *)data1);
- data0 += stride0;
- data1 += stride1;
-# elif @nop@ == 3
- accum += @from@(*(@type@ *)data0) *
- @from@(*(@type@ *)data1) *
- @from@(*(@type@ *)data2);
- data0 += stride0;
- data1 += stride1;
- data2 += stride2;
-# else
- @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
- int i;
- for (i = 1; i < nop; ++i) {
- temp *= @from@(*(@type@ *)dataptr[i]);
- }
- accum += temp;
- for (i = 0; i < nop; ++i) {
- dataptr[i] += strides[i];
- }
-# endif
-#else /* complex */
-# if @nop@ == 1
- accum_re += ((@temptype@ *)data0)[0];
- accum_im += ((@temptype@ *)data0)[1];
- data0 += stride0;
-# else
-# if @nop@ <= 3
-#define _SUMPROD_NOP @nop@
-# else
-#define _SUMPROD_NOP nop
-# endif
- @temptype@ re, im, tmp;
- int i;
- re = ((@temptype@ *)dataptr[0])[0];
- im = ((@temptype@ *)dataptr[0])[1];
- for (i = 1; i < _SUMPROD_NOP; ++i) {
- tmp = re * ((@temptype@ *)dataptr[i])[0] -
- im * ((@temptype@ *)dataptr[i])[1];
- im = re * ((@temptype@ *)dataptr[i])[1] +
- im * ((@temptype@ *)dataptr[i])[0];
- re = tmp;
- }
- accum_re += re;
- accum_im += im;
- for (i = 0; i < _SUMPROD_NOP; ++i) {
- dataptr[i] += strides[i];
- }
-#undef _SUMPROD_NOP
-# endif
-#endif
- }
-
-#if @complex@
-# if @nop@ <= 3
- ((@temptype@ *)dataptr[@nop@])[0] += accum_re;
- ((@temptype@ *)dataptr[@nop@])[1] += accum_im;
-# else
- ((@temptype@ *)dataptr[nop])[0] += accum_re;
- ((@temptype@ *)dataptr[nop])[1] += accum_im;
-# endif
-#else
-# if @nop@ <= 3
- *((@type@ *)dataptr[@nop@]) = @to@(accum +
- @from@(*((@type@ *)dataptr[@nop@])));
-# else
- *((@type@ *)dataptr[nop]) = @to@(accum +
- @from@(*((@type@ *)dataptr[nop])));
-# endif
-#endif
-
-}
-
-/**end repeat1**/
-
-/**end repeat**/
-
-
-/* Do OR of ANDs for the boolean type */
-
-/**begin repeat
- * #nop = 1, 2, 3, 1000#
- * #noplabel = one, two, three, any#
- */
-
-static void
-bool_sum_of_products_@noplabel@(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
-#if (@nop@ <= 3)
- char *data0 = dataptr[0];
- npy_intp stride0 = strides[0];
-#endif
-#if (@nop@ == 2 || @nop@ == 3)
- char *data1 = dataptr[1];
- npy_intp stride1 = strides[1];
-#endif
-#if (@nop@ == 3)
- char *data2 = dataptr[2];
- npy_intp stride2 = strides[2];
-#endif
-#if (@nop@ <= 3)
- char *data_out = dataptr[@nop@];
- npy_intp stride_out = strides[@nop@];
-#endif
-
- while (count--) {
-#if @nop@ == 1
- *(npy_bool *)data_out = *(npy_bool *)data0 ||
- *(npy_bool *)data_out;
- data0 += stride0;
- data_out += stride_out;
-#elif @nop@ == 2
- *(npy_bool *)data_out = (*(npy_bool *)data0 &&
- *(npy_bool *)data1) ||
- *(npy_bool *)data_out;
- data0 += stride0;
- data1 += stride1;
- data_out += stride_out;
-#elif @nop@ == 3
- *(npy_bool *)data_out = (*(npy_bool *)data0 &&
- *(npy_bool *)data1 &&
- *(npy_bool *)data2) ||
- *(npy_bool *)data_out;
- data0 += stride0;
- data1 += stride1;
- data2 += stride2;
- data_out += stride_out;
-#else
- npy_bool temp = *(npy_bool *)dataptr[0];
- int i;
- for (i = 1; i < nop; ++i) {
- temp = temp && *(npy_bool *)dataptr[i];
- }
- *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i];
- for (i = 0; i <= nop; ++i) {
- dataptr[i] += strides[i];
- }
-#endif
- }
-}
-
-static void
-bool_sum_of_products_contig_@noplabel@(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
-#if (@nop@ <= 3)
- char *data0 = dataptr[0];
-#endif
-#if (@nop@ == 2 || @nop@ == 3)
- char *data1 = dataptr[1];
-#endif
-#if (@nop@ == 3)
- char *data2 = dataptr[2];
-#endif
-#if (@nop@ <= 3)
- char *data_out = dataptr[@nop@];
-#endif
-
-#if (@nop@ <= 3)
-/* This is placed before the main loop to make small counts faster */
-finish_after_unrolled_loop:
- switch (count) {
-/**begin repeat1
- * #i = 6, 5, 4, 3, 2, 1, 0#
- */
- case @i@+1:
-# if @nop@ == 1
- ((npy_bool *)data_out)[@i@] = ((npy_bool *)data0)[@i@] ||
- ((npy_bool *)data_out)[@i@];
-# elif @nop@ == 2
- ((npy_bool *)data_out)[@i@] =
- (((npy_bool *)data0)[@i@] &&
- ((npy_bool *)data1)[@i@]) ||
- ((npy_bool *)data_out)[@i@];
-# elif @nop@ == 3
- ((npy_bool *)data_out)[@i@] =
- (((npy_bool *)data0)[@i@] &&
- ((npy_bool *)data1)[@i@] &&
- ((npy_bool *)data2)[@i@]) ||
- ((npy_bool *)data_out)[@i@];
-# endif
-/**end repeat1**/
- case 0:
- return;
- }
-#endif
-
-/* Unroll the loop by 8 for fixed-size nop */
-#if (@nop@ <= 3)
- while (count >= 8) {
- count -= 8;
-#else
- while (count--) {
-#endif
-
-# if @nop@ == 1
-/**begin repeat1
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- *((npy_bool *)data_out + @i@) = (*((npy_bool *)data0 + @i@)) ||
- (*((npy_bool *)data_out + @i@));
-/**end repeat1**/
- data0 += 8*sizeof(npy_bool);
- data_out += 8*sizeof(npy_bool);
-# elif @nop@ == 2
-/**begin repeat1
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- *((npy_bool *)data_out + @i@) =
- ((*((npy_bool *)data0 + @i@)) &&
- (*((npy_bool *)data1 + @i@))) ||
- (*((npy_bool *)data_out + @i@));
-/**end repeat1**/
- data0 += 8*sizeof(npy_bool);
- data1 += 8*sizeof(npy_bool);
- data_out += 8*sizeof(npy_bool);
-# elif @nop@ == 3
-/**begin repeat1
- * #i = 0, 1, 2, 3, 4, 5, 6, 7#
- */
- *((npy_bool *)data_out + @i@) =
- ((*((npy_bool *)data0 + @i@)) &&
- (*((npy_bool *)data1 + @i@)) &&
- (*((npy_bool *)data2 + @i@))) ||
- (*((npy_bool *)data_out + @i@));
-/**end repeat1**/
- data0 += 8*sizeof(npy_bool);
- data1 += 8*sizeof(npy_bool);
- data2 += 8*sizeof(npy_bool);
- data_out += 8*sizeof(npy_bool);
-# else
- npy_bool temp = *(npy_bool *)dataptr[0];
- int i;
- for (i = 1; i < nop; ++i) {
- temp = temp && *(npy_bool *)dataptr[i];
- }
- *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i];
- for (i = 0; i <= nop; ++i) {
- dataptr[i] += sizeof(npy_bool);
- }
-# endif
- }
-
- /* If the loop was unrolled, we need to finish it off */
-#if (@nop@ <= 3)
- goto finish_after_unrolled_loop;
-#endif
-}
-
-static void
-bool_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr,
- npy_intp const *strides, npy_intp count)
-{
- npy_bool accum = 0;
-
-#if (@nop@ <= 3)
- char *data0 = dataptr[0];
- npy_intp stride0 = strides[0];
-#endif
-#if (@nop@ == 2 || @nop@ == 3)
- char *data1 = dataptr[1];
- npy_intp stride1 = strides[1];
-#endif
-#if (@nop@ == 3)
- char *data2 = dataptr[2];
- npy_intp stride2 = strides[2];
-#endif
-
- while (count--) {
-#if @nop@ == 1
- accum = *(npy_bool *)data0 || accum;
- data0 += stride0;
-#elif @nop@ == 2
- accum = (*(npy_bool *)data0 && *(npy_bool *)data1) || accum;
- data0 += stride0;
- data1 += stride1;
-#elif @nop@ == 3
- accum = (*(npy_bool *)data0 &&
- *(npy_bool *)data1 &&
- *(npy_bool *)data2) || accum;
- data0 += stride0;
- data1 += stride1;
- data2 += stride2;
-#else
- npy_bool temp = *(npy_bool *)dataptr[0];
- int i;
- for (i = 1; i < nop; ++i) {
- temp = temp && *(npy_bool *)dataptr[i];
- }
- accum = temp || accum;
- for (i = 0; i <= nop; ++i) {
- dataptr[i] += strides[i];
- }
-#endif
- }
-
-# if @nop@ <= 3
- *((npy_bool *)dataptr[@nop@]) = accum || *((npy_bool *)dataptr[@nop@]);
-# else
- *((npy_bool *)dataptr[nop]) = accum || *((npy_bool *)dataptr[nop]);
-# endif
-}
-
-/**end repeat**/
-
-typedef void (*sum_of_products_fn)(int, char **, npy_intp const*, npy_intp);
-
-/* These tables need to match up with the type enum */
-static sum_of_products_fn
-_contig_outstride0_unary_specialization_table[NPY_NTYPES] = {
-/**begin repeat
- * #name = bool,
- * byte, ubyte,
- * short, ushort,
- * int, uint,
- * long, ulong,
- * longlong, ulonglong,
- * float, double, longdouble,
- * cfloat, cdouble, clongdouble,
- * object, string, unicode, void,
- * datetime, timedelta, half#
- * #use = 0,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1, 1,
- * 1, 1, 1,
- * 0, 0, 0, 0,
- * 0, 0, 1#
- */
-#if @use@
- &@name@_sum_of_products_contig_outstride0_one,
-#else
- NULL,
-#endif
-/**end repeat**/
-}; /* End of _contig_outstride0_unary_specialization_table */
-
-static sum_of_products_fn _binary_specialization_table[NPY_NTYPES][5] = {
-/**begin repeat
- * #name = bool,
- * byte, ubyte,
- * short, ushort,
- * int, uint,
- * long, ulong,
- * longlong, ulonglong,
- * float, double, longdouble,
- * cfloat, cdouble, clongdouble,
- * object, string, unicode, void,
- * datetime, timedelta, half#
- * #use = 0,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1, 1,
- * 0, 0, 0,
- * 0, 0, 0, 0,
- * 0, 0, 1#
- */
-#if @use@
-{
- &@name@_sum_of_products_stride0_contig_outstride0_two,
- &@name@_sum_of_products_stride0_contig_outcontig_two,
- &@name@_sum_of_products_contig_stride0_outstride0_two,
- &@name@_sum_of_products_contig_stride0_outcontig_two,
- &@name@_sum_of_products_contig_contig_outstride0_two,
-},
-#else
- {NULL, NULL, NULL, NULL, NULL},
-#endif
-/**end repeat**/
-}; /* End of _binary_specialization_table */
-
-static sum_of_products_fn _outstride0_specialized_table[NPY_NTYPES][4] = {
-/**begin repeat
- * #name = bool,
- * byte, ubyte,
- * short, ushort,
- * int, uint,
- * long, ulong,
- * longlong, ulonglong,
- * float, double, longdouble,
- * cfloat, cdouble, clongdouble,
- * object, string, unicode, void,
- * datetime, timedelta, half#
- * #use = 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1, 1,
- * 1, 1, 1,
- * 0, 0, 0, 0,
- * 0, 0, 1#
- */
-#if @use@
-{
- &@name@_sum_of_products_outstride0_any,
- &@name@_sum_of_products_outstride0_one,
- &@name@_sum_of_products_outstride0_two,
- &@name@_sum_of_products_outstride0_three
-},
-#else
- {NULL, NULL, NULL, NULL},
-#endif
-/**end repeat**/
-}; /* End of _outstride0_specialized_table */
-
-static sum_of_products_fn _allcontig_specialized_table[NPY_NTYPES][4] = {
-/**begin repeat
- * #name = bool,
- * byte, ubyte,
- * short, ushort,
- * int, uint,
- * long, ulong,
- * longlong, ulonglong,
- * float, double, longdouble,
- * cfloat, cdouble, clongdouble,
- * object, string, unicode, void,
- * datetime, timedelta, half#
- * #use = 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1, 1,
- * 1, 1, 1,
- * 0, 0, 0, 0,
- * 0, 0, 1#
- */
-#if @use@
-{
- &@name@_sum_of_products_contig_any,
- &@name@_sum_of_products_contig_one,
- &@name@_sum_of_products_contig_two,
- &@name@_sum_of_products_contig_three
-},
-#else
- {NULL, NULL, NULL, NULL},
-#endif
-/**end repeat**/
-}; /* End of _allcontig_specialized_table */
-
-static sum_of_products_fn _unspecialized_table[NPY_NTYPES][4] = {
-/**begin repeat
- * #name = bool,
- * byte, ubyte,
- * short, ushort,
- * int, uint,
- * long, ulong,
- * longlong, ulonglong,
- * float, double, longdouble,
- * cfloat, cdouble, clongdouble,
- * object, string, unicode, void,
- * datetime, timedelta, half#
- * #use = 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1,
- * 1, 1, 1,
- * 1, 1, 1,
- * 0, 0, 0, 0,
- * 0, 0, 1#
- */
-#if @use@
-{
- &@name@_sum_of_products_any,
- &@name@_sum_of_products_one,
- &@name@_sum_of_products_two,
- &@name@_sum_of_products_three
-},
-#else
- {NULL, NULL, NULL, NULL},
-#endif
-/**end repeat**/
-}; /* End of _unnspecialized_table */
-
-static sum_of_products_fn
-get_sum_of_products_function(int nop, int type_num,
- npy_intp itemsize, npy_intp const *fixed_strides)
-{
- int iop;
-
- if (type_num >= NPY_NTYPES) {
- return NULL;
- }
-
- /* contiguous reduction */
- if (nop == 1 && fixed_strides[0] == itemsize && fixed_strides[1] == 0) {
- sum_of_products_fn ret =
- _contig_outstride0_unary_specialization_table[type_num];
- if (ret != NULL) {
- return ret;
- }
- }
-
- /* nop of 2 has more specializations */
- if (nop == 2) {
- /* Encode the zero/contiguous strides */
- int code;
- code = (fixed_strides[0] == 0) ? 0 :
- (fixed_strides[0] == itemsize) ? 2*2*1 : 8;
- code += (fixed_strides[1] == 0) ? 0 :
- (fixed_strides[1] == itemsize) ? 2*1 : 8;
- code += (fixed_strides[2] == 0) ? 0 :
- (fixed_strides[2] == itemsize) ? 1 : 8;
- if (code >= 2 && code < 7) {
- sum_of_products_fn ret =
- _binary_specialization_table[type_num][code-2];
- if (ret != NULL) {
- return ret;
- }
- }
- }
-
- /* Inner loop with an output stride of 0 */
- if (fixed_strides[nop] == 0) {
- return _outstride0_specialized_table[type_num][nop <= 3 ? nop : 0];
- }
-
- /* Check for all contiguous */
- for (iop = 0; iop < nop + 1; ++iop) {
- if (fixed_strides[iop] != itemsize) {
- break;
- }
- }
-
- /* Contiguous loop */
- if (iop == nop + 1) {
- return _allcontig_specialized_table[type_num][nop <= 3 ? nop : 0];
- }
-
- /* None of the above specializations caught it, general loops */
- return _unspecialized_table[type_num][nop <= 3 ? nop : 0];
-}
+#include "einsum_sumprod.h"
+#include "einsum_debug.h"
/*
diff --git a/numpy/core/src/multiarray/einsum_debug.h b/numpy/core/src/multiarray/einsum_debug.h
new file mode 100644
index 000000000..9aa81fcbd
--- /dev/null
+++ b/numpy/core/src/multiarray/einsum_debug.h
@@ -0,0 +1,28 @@
+/*
+ * This file provides debug macros used by the other einsum files.
+ *
+ * Copyright (c) 2011 by Mark Wiebe (mwwiebe@gmail.com)
+ * The University of British Columbia
+ *
+ * See LICENSE.txt for the license.
+ */
+#ifndef _NPY_MULTIARRAY_EINSUM_DEBUG_H
+#define _NPY_MULTIARRAY_EINSUM_DEBUG_H
+
+/********** PRINTF DEBUG TRACING **************/
+#define NPY_EINSUM_DBG_TRACING 0
+
+#if NPY_EINSUM_DBG_TRACING
+#include <cstdio>
+#define NPY_EINSUM_DBG_PRINT(s) printf("%s", s);
+#define NPY_EINSUM_DBG_PRINT1(s, p1) printf(s, p1);
+#define NPY_EINSUM_DBG_PRINT2(s, p1, p2) printf(s, p1, p2);
+#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3) printf(s);
+#else
+#define NPY_EINSUM_DBG_PRINT(s)
+#define NPY_EINSUM_DBG_PRINT1(s, p1)
+#define NPY_EINSUM_DBG_PRINT2(s, p1, p2)
+#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3)
+#endif
+
+#endif
diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src
new file mode 100644
index 000000000..c58e74287
--- /dev/null
+++ b/numpy/core/src/multiarray/einsum_sumprod.c.src
@@ -0,0 +1,1897 @@
+/*
+ * This file provides optimized sum of product implementations used internally
+ * by einsum.
+ *
+ * Copyright (c) 2011 by Mark Wiebe (mwwiebe@gmail.com)
+ * The University of British Columbia
+ *
+ * See LICENSE.txt for the license.
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <numpy/npy_common.h>
+#include <numpy/ndarraytypes.h> /* for NPY_NTYPES */
+#include <numpy/halffloat.h>
+
+#include "einsum_sumprod.h"
+#include "einsum_debug.h"
+
+
+#ifdef NPY_HAVE_SSE_INTRINSICS
+#define EINSUM_USE_SSE1 1
+#else
+#define EINSUM_USE_SSE1 0
+#endif
+
+#ifdef NPY_HAVE_SSE2_INTRINSICS
+#define EINSUM_USE_SSE2 1
+#else
+#define EINSUM_USE_SSE2 0
+#endif
+
+#if EINSUM_USE_SSE1
+#include <xmmintrin.h>
+#endif
+
+#if EINSUM_USE_SSE2
+#include <emmintrin.h>
+#endif
+
+#define EINSUM_IS_SSE_ALIGNED(x) ((((npy_intp)x)&0xf) == 0)
+
+/**********************************************/
+
+/**begin repeat
+ * #name = byte, short, int, long, longlong,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * half, float, double, longdouble,
+ * cfloat, cdouble, clongdouble#
+ * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
+ * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong,
+ * npy_half, npy_float, npy_double, npy_longdouble,
+ * npy_cfloat, npy_cdouble, npy_clongdouble#
+ * #temptype = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
+ * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong,
+ * npy_float, npy_float, npy_double, npy_longdouble,
+ * npy_float, npy_double, npy_longdouble#
+ * #to = ,,,,,
+ * ,,,,,
+ * npy_float_to_half,,,,
+ * ,,#
+ * #from = ,,,,,
+ * ,,,,,
+ * npy_half_to_float,,,,
+ * ,,#
+ * #complex = 0*5,
+ * 0*5,
+ * 0*4,
+ * 1*3#
+ * #float32 = 0*5,
+ * 0*5,
+ * 0,1,0,0,
+ * 0*3#
+ * #float64 = 0*5,
+ * 0*5,
+ * 0,0,1,0,
+ * 0*3#
+ */
+
+/**begin repeat1
+ * #nop = 1, 2, 3, 1000#
+ * #noplabel = one, two, three, any#
+ */
+static void
+@name@_sum_of_products_@noplabel@(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
+ char *data0 = dataptr[0];
+ npy_intp stride0 = strides[0];
+#endif
+#if (@nop@ == 2 || @nop@ == 3) && !@complex@
+ char *data1 = dataptr[1];
+ npy_intp stride1 = strides[1];
+#endif
+#if (@nop@ == 3) && !@complex@
+ char *data2 = dataptr[2];
+ npy_intp stride2 = strides[2];
+#endif
+#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
+ char *data_out = dataptr[@nop@];
+ npy_intp stride_out = strides[@nop@];
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_@noplabel@ (%d)\n", (int)count);
+
+ while (count--) {
+#if !@complex@
+# if @nop@ == 1
+ *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) +
+ @from@(*(@type@ *)data_out));
+ data0 += stride0;
+ data_out += stride_out;
+# elif @nop@ == 2
+ *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) *
+ @from@(*(@type@ *)data1) +
+ @from@(*(@type@ *)data_out));
+ data0 += stride0;
+ data1 += stride1;
+ data_out += stride_out;
+# elif @nop@ == 3
+ *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) *
+ @from@(*(@type@ *)data1) *
+ @from@(*(@type@ *)data2) +
+ @from@(*(@type@ *)data_out));
+ data0 += stride0;
+ data1 += stride1;
+ data2 += stride2;
+ data_out += stride_out;
+# else
+ @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp *= @from@(*(@type@ *)dataptr[i]);
+ }
+ *(@type@ *)dataptr[nop] = @to@(temp +
+ @from@(*(@type@ *)dataptr[i]));
+ for (i = 0; i <= nop; ++i) {
+ dataptr[i] += strides[i];
+ }
+# endif
+#else /* complex */
+# if @nop@ == 1
+ ((@temptype@ *)data_out)[0] = ((@temptype@ *)data0)[0] +
+ ((@temptype@ *)data_out)[0];
+ ((@temptype@ *)data_out)[1] = ((@temptype@ *)data0)[1] +
+ ((@temptype@ *)data_out)[1];
+ data0 += stride0;
+ data_out += stride_out;
+# else
+# if @nop@ <= 3
+#define _SUMPROD_NOP @nop@
+# else
+#define _SUMPROD_NOP nop
+# endif
+ @temptype@ re, im, tmp;
+ int i;
+ re = ((@temptype@ *)dataptr[0])[0];
+ im = ((@temptype@ *)dataptr[0])[1];
+ for (i = 1; i < _SUMPROD_NOP; ++i) {
+ tmp = re * ((@temptype@ *)dataptr[i])[0] -
+ im * ((@temptype@ *)dataptr[i])[1];
+ im = re * ((@temptype@ *)dataptr[i])[1] +
+ im * ((@temptype@ *)dataptr[i])[0];
+ re = tmp;
+ }
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re +
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[0];
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im +
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[1];
+
+ for (i = 0; i <= _SUMPROD_NOP; ++i) {
+ dataptr[i] += strides[i];
+ }
+#undef _SUMPROD_NOP
+# endif
+#endif
+ }
+}
+
+#if @nop@ == 1
+
+static void
+@name@_sum_of_products_contig_one(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @type@ *data_out = (@type@ *)dataptr[1];
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_one (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+#if !@complex@
+ data_out[@i@] = @to@(@from@(data0[@i@]) +
+ @from@(data_out[@i@]));
+#else
+ ((@temptype@ *)data_out + 2*@i@)[0] =
+ ((@temptype@ *)data0 + 2*@i@)[0] +
+ ((@temptype@ *)data_out + 2*@i@)[0];
+ ((@temptype@ *)data_out + 2*@i@)[1] =
+ ((@temptype@ *)data0 + 2*@i@)[1] +
+ ((@temptype@ *)data_out + 2*@i@)[1];
+#endif
+/**end repeat2**/
+ case 0:
+ return;
+ }
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+#if !@complex@
+ data_out[@i@] = @to@(@from@(data0[@i@]) +
+ @from@(data_out[@i@]));
+#else /* complex */
+ ((@temptype@ *)data_out + 2*@i@)[0] =
+ ((@temptype@ *)data0 + 2*@i@)[0] +
+ ((@temptype@ *)data_out + 2*@i@)[0];
+ ((@temptype@ *)data_out + 2*@i@)[1] =
+ ((@temptype@ *)data0 + 2*@i@)[1] +
+ ((@temptype@ *)data_out + 2*@i@)[1];
+#endif
+/**end repeat2**/
+ data0 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+#elif @nop@ == 2 && !@complex@
+
+static void
+@name@_sum_of_products_contig_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @type@ *data1 = (@type@ *)dataptr[1];
+ @type@ *data_out = (@type@ *)dataptr[2];
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, b;
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, b;
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ data_out[@i@] = @to@(@from@(data0[@i@]) *
+ @from@(data1[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+ case 0:
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) &&
+ EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@));
+ b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
+ _mm_store_ps(data_out+@i@, b);
+/**end repeat2**/
+ data0 += 8;
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) &&
+ EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@));
+ b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
+ _mm_store_pd(data_out+@i@, b);
+/**end repeat2**/
+ data0 += 8;
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@));
+ b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
+ _mm_storeu_ps(data_out+@i@, b);
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@));
+ b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
+ _mm_storeu_pd(data_out+@i@, b);
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ data_out[@i@] = @to@(@from@(data0[@i@]) *
+ @from@(data1[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+#endif
+ data0 += 8;
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+/* Some extra specializations for the two operand case */
+static void
+@name@_sum_of_products_stride0_contig_outcontig_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @temptype@ value0 = @from@(*(@type@ *)dataptr[0]);
+ @type@ *data1 = (@type@ *)dataptr[1];
+ @type@ *data_out = (@type@ *)dataptr[2];
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, b, value0_sse;
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, b, value0_sse;
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outcontig_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ data_out[@i@] = @to@(value0 *
+ @from@(data1[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+ case 0:
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ value0_sse = _mm_set_ps1(value0);
+
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(value0_sse, _mm_load_ps(data1+@i@));
+ b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
+ _mm_store_ps(data_out+@i@, b);
+/**end repeat2**/
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ if (count > 0) {
+ goto finish_after_unrolled_loop;
+ }
+ else {
+ return;
+ }
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ value0_sse = _mm_set1_pd(value0);
+
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(value0_sse, _mm_load_pd(data1+@i@));
+ b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
+ _mm_store_pd(data_out+@i@, b);
+/**end repeat2**/
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ if (count > 0) {
+ goto finish_after_unrolled_loop;
+ }
+ else {
+ return;
+ }
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(value0_sse, _mm_loadu_ps(data1+@i@));
+ b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
+ _mm_storeu_ps(data_out+@i@, b);
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(value0_sse, _mm_loadu_pd(data1+@i@));
+ b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
+ _mm_storeu_pd(data_out+@i@, b);
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ data_out[@i@] = @to@(value0 *
+ @from@(data1[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+#endif
+ data1 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ if (count > 0) {
+ goto finish_after_unrolled_loop;
+ }
+}
+
+static void
+@name@_sum_of_products_contig_stride0_outcontig_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @temptype@ value1 = @from@(*(@type@ *)dataptr[1]);
+ @type@ *data_out = (@type@ *)dataptr[2];
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, b, value1_sse;
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, b, value1_sse;
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outcontig_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ data_out[@i@] = @to@(@from@(data0[@i@])*
+ value1 +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+ case 0:
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ value1_sse = _mm_set_ps1(value1);
+
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(_mm_load_ps(data0+@i@), value1_sse);
+ b = _mm_add_ps(a, _mm_load_ps(data_out+@i@));
+ _mm_store_ps(data_out+@i@, b);
+/**end repeat2**/
+ data0 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ value1_sse = _mm_set1_pd(value1);
+
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(_mm_load_pd(data0+@i@), value1_sse);
+ b = _mm_add_pd(a, _mm_load_pd(data_out+@i@));
+ _mm_store_pd(data_out+@i@, b);
+/**end repeat2**/
+ data0 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), value1_sse);
+ b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@));
+ _mm_storeu_ps(data_out+@i@, b);
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), value1_sse);
+ b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@));
+ _mm_storeu_pd(data_out+@i@, b);
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ data_out[@i@] = @to@(@from@(data0[@i@])*
+ value1 +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+#endif
+ data0 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+static void
+@name@_sum_of_products_contig_contig_outstride0_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @type@ *data1 = (@type@ *)dataptr[1];
+ @temptype@ accum = 0;
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, accum_sse = _mm_setzero_ps();
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, accum_sse = _mm_setzero_pd();
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_contig_outstride0_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ accum += @from@(data0[@i@]) * @from@(data1[@i@]);
+/**end repeat2**/
+ case 0:
+ *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum);
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+ _mm_prefetch(data1 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@));
+ accum_sse = _mm_add_ps(accum_sse, a);
+/**end repeat2**/
+ data0 += 8;
+ data1 += 8;
+ }
+
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+ _mm_prefetch(data1 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@));
+ accum_sse = _mm_add_pd(accum_sse, a);
+/**end repeat2**/
+ data0 += 8;
+ data1 += 8;
+ }
+
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+ _mm_prefetch(data1 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@));
+ accum_sse = _mm_add_ps(accum_sse, a);
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+ _mm_prefetch(data1 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@));
+ accum_sse = _mm_add_pd(accum_sse, a);
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ accum += @from@(data0[@i@]) * @from@(data1[@i@]);
+/**end repeat2**/
+#endif
+ data0 += 8;
+ data1 += 8;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+#endif
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+static void
+@name@_sum_of_products_stride0_contig_outstride0_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @temptype@ value0 = @from@(*(@type@ *)dataptr[0]);
+ @type@ *data1 = (@type@ *)dataptr[1];
+ @temptype@ accum = 0;
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, accum_sse = _mm_setzero_ps();
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, accum_sse = _mm_setzero_pd();
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outstride0_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ accum += @from@(data1[@i@]);
+/**end repeat2**/
+ case 0:
+ *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + value0 * accum);
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data1)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data1+@i@));
+/**end repeat2**/
+ data1 += 8;
+ }
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data1)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data1+@i@));
+/**end repeat2**/
+ data1 += 8;
+ }
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data1+@i@));
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data1+@i@));
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ accum += @from@(data1[@i@]);
+/**end repeat2**/
+#endif
+ data1 += 8;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+#endif
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+static void
+@name@_sum_of_products_contig_stride0_outstride0_two(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @temptype@ value1 = @from@(*(@type@ *)dataptr[1]);
+ @temptype@ accum = 0;
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, accum_sse = _mm_setzero_ps();
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, accum_sse = _mm_setzero_pd();
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outstride0_two (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+ accum += @from@(data0[@i@]);
+/**end repeat2**/
+ case 0:
+ *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum * value1);
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@));
+/**end repeat2**/
+ data0 += 8;
+ }
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@));
+/**end repeat2**/
+ data0 += 8;
+ }
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@));
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@));
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ accum += @from@(data0[@i@]);
+/**end repeat2**/
+#endif
+ data0 += 8;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+#endif
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+#elif @nop@ == 3 && !@complex@
+
+static void
+@name@_sum_of_products_contig_three(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ @type@ *data0 = (@type@ *)dataptr[0];
+ @type@ *data1 = (@type@ *)dataptr[1];
+ @type@ *data2 = (@type@ *)dataptr[2];
+ @type@ *data_out = (@type@ *)dataptr[3];
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ data_out[@i@] = @to@(@from@(data0[@i@]) *
+ @from@(data1[@i@]) *
+ @from@(data2[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+ data0 += 8;
+ data1 += 8;
+ data2 += 8;
+ data_out += 8;
+ }
+
+ /* Finish off the loop */
+
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ if (count-- == 0) {
+ return;
+ }
+ data_out[@i@] = @to@(@from@(data0[@i@]) *
+ @from@(data1[@i@]) *
+ @from@(data2[@i@]) +
+ @from@(data_out[@i@]));
+/**end repeat2**/
+}
+
+#else /* @nop@ > 3 || @complex */
+
+static void
+@name@_sum_of_products_contig_@noplabel@(int nop, char **dataptr,
+ npy_intp const *NPY_UNUSED(strides), npy_intp count)
+{
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_@noplabel@ (%d)\n",
+ (int)count);
+
+ while (count--) {
+#if !@complex@
+ @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp *= @from@(*(@type@ *)dataptr[i]);
+ }
+ *(@type@ *)dataptr[nop] = @to@(temp +
+ @from@(*(@type@ *)dataptr[i]));
+ for (i = 0; i <= nop; ++i) {
+ dataptr[i] += sizeof(@type@);
+ }
+#else /* complex */
+# if @nop@ <= 3
+# define _SUMPROD_NOP @nop@
+# else
+# define _SUMPROD_NOP nop
+# endif
+ @temptype@ re, im, tmp;
+ int i;
+ re = ((@temptype@ *)dataptr[0])[0];
+ im = ((@temptype@ *)dataptr[0])[1];
+ for (i = 1; i < _SUMPROD_NOP; ++i) {
+ tmp = re * ((@temptype@ *)dataptr[i])[0] -
+ im * ((@temptype@ *)dataptr[i])[1];
+ im = re * ((@temptype@ *)dataptr[i])[1] +
+ im * ((@temptype@ *)dataptr[i])[0];
+ re = tmp;
+ }
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re +
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[0];
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im +
+ ((@temptype@ *)dataptr[_SUMPROD_NOP])[1];
+
+ for (i = 0; i <= _SUMPROD_NOP; ++i) {
+ dataptr[i] += sizeof(@type@);
+ }
+# undef _SUMPROD_NOP
+#endif
+ }
+}
+
+#endif /* functions for various @nop@ */
+
+#if @nop@ == 1
+
+static void
+@name@_sum_of_products_contig_outstride0_one(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+#if @complex@
+ @temptype@ accum_re = 0, accum_im = 0;
+ @temptype@ *data0 = (@temptype@ *)dataptr[0];
+#else
+ @temptype@ accum = 0;
+ @type@ *data0 = (@type@ *)dataptr[0];
+#endif
+
+#if EINSUM_USE_SSE1 && @float32@
+ __m128 a, accum_sse = _mm_setzero_ps();
+#elif EINSUM_USE_SSE2 && @float64@
+ __m128d a, accum_sse = _mm_setzero_pd();
+#endif
+
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_outstride0_one (%d)\n",
+ (int)count);
+
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat2
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+#if !@complex@
+ accum += @from@(data0[@i@]);
+#else /* complex */
+ accum_re += data0[2*@i@+0];
+ accum_im += data0[2*@i@+1];
+#endif
+/**end repeat2**/
+ case 0:
+#if @complex@
+ ((@temptype@ *)dataptr[1])[0] += accum_re;
+ ((@temptype@ *)dataptr[1])[1] += accum_im;
+#else
+ *((@type@ *)dataptr[1]) = @to@(accum +
+ @from@(*((@type@ *)dataptr[1])));
+#endif
+ return;
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@));
+/**end repeat2**/
+ data0 += 8;
+ }
+
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Use aligned instructions if possible */
+ if (EINSUM_IS_SSE_ALIGNED(data0)) {
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@));
+/**end repeat2**/
+ data0 += 8;
+ }
+
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+ }
+#endif
+
+ /* Unroll the loop by 8 */
+ while (count >= 8) {
+ count -= 8;
+
+#if EINSUM_USE_SSE1 && @float32@
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 4#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@));
+/**end repeat2**/
+#elif EINSUM_USE_SSE2 && @float64@
+ _mm_prefetch(data0 + 512, _MM_HINT_T0);
+
+/**begin repeat2
+ * #i = 0, 2, 4, 6#
+ */
+ /*
+ * NOTE: This accumulation changes the order, so will likely
+ * produce slightly different results.
+ */
+ accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@));
+/**end repeat2**/
+#else
+/**begin repeat2
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+# if !@complex@
+ accum += @from@(data0[@i@]);
+# else /* complex */
+ accum_re += data0[2*@i@+0];
+ accum_im += data0[2*@i@+1];
+# endif
+/**end repeat2**/
+#endif
+
+#if !@complex@
+ data0 += 8;
+#else
+ data0 += 8*2;
+#endif
+ }
+
+#if EINSUM_USE_SSE1 && @float32@
+ /* Add the four SSE values and put in accum */
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2));
+ accum_sse = _mm_add_ps(a, accum_sse);
+ _mm_store_ss(&accum, accum_sse);
+#elif EINSUM_USE_SSE2 && @float64@
+ /* Add the two SSE2 values and put in accum */
+ a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1));
+ accum_sse = _mm_add_pd(a, accum_sse);
+ _mm_store_sd(&accum, accum_sse);
+#endif
+
+ /* Finish off the loop */
+ goto finish_after_unrolled_loop;
+}
+
+#endif /* @nop@ == 1 */
+
+static void
+@name@_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+#if @complex@
+ @temptype@ accum_re = 0, accum_im = 0;
+#else
+ @temptype@ accum = 0;
+#endif
+
+#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@)
+ char *data0 = dataptr[0];
+ npy_intp stride0 = strides[0];
+#endif
+#if (@nop@ == 2 || @nop@ == 3) && !@complex@
+ char *data1 = dataptr[1];
+ npy_intp stride1 = strides[1];
+#endif
+#if (@nop@ == 3) && !@complex@
+ char *data2 = dataptr[2];
+ npy_intp stride2 = strides[2];
+#endif
+
+ NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_outstride0_@noplabel@ (%d)\n",
+ (int)count);
+
+ while (count--) {
+#if !@complex@
+# if @nop@ == 1
+ accum += @from@(*(@type@ *)data0);
+ data0 += stride0;
+# elif @nop@ == 2
+ accum += @from@(*(@type@ *)data0) *
+ @from@(*(@type@ *)data1);
+ data0 += stride0;
+ data1 += stride1;
+# elif @nop@ == 3
+ accum += @from@(*(@type@ *)data0) *
+ @from@(*(@type@ *)data1) *
+ @from@(*(@type@ *)data2);
+ data0 += stride0;
+ data1 += stride1;
+ data2 += stride2;
+# else
+ @temptype@ temp = @from@(*(@type@ *)dataptr[0]);
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp *= @from@(*(@type@ *)dataptr[i]);
+ }
+ accum += temp;
+ for (i = 0; i < nop; ++i) {
+ dataptr[i] += strides[i];
+ }
+# endif
+#else /* complex */
+# if @nop@ == 1
+ accum_re += ((@temptype@ *)data0)[0];
+ accum_im += ((@temptype@ *)data0)[1];
+ data0 += stride0;
+# else
+# if @nop@ <= 3
+#define _SUMPROD_NOP @nop@
+# else
+#define _SUMPROD_NOP nop
+# endif
+ @temptype@ re, im, tmp;
+ int i;
+ re = ((@temptype@ *)dataptr[0])[0];
+ im = ((@temptype@ *)dataptr[0])[1];
+ for (i = 1; i < _SUMPROD_NOP; ++i) {
+ tmp = re * ((@temptype@ *)dataptr[i])[0] -
+ im * ((@temptype@ *)dataptr[i])[1];
+ im = re * ((@temptype@ *)dataptr[i])[1] +
+ im * ((@temptype@ *)dataptr[i])[0];
+ re = tmp;
+ }
+ accum_re += re;
+ accum_im += im;
+ for (i = 0; i < _SUMPROD_NOP; ++i) {
+ dataptr[i] += strides[i];
+ }
+#undef _SUMPROD_NOP
+# endif
+#endif
+ }
+
+#if @complex@
+# if @nop@ <= 3
+ ((@temptype@ *)dataptr[@nop@])[0] += accum_re;
+ ((@temptype@ *)dataptr[@nop@])[1] += accum_im;
+# else
+ ((@temptype@ *)dataptr[nop])[0] += accum_re;
+ ((@temptype@ *)dataptr[nop])[1] += accum_im;
+# endif
+#else
+# if @nop@ <= 3
+ *((@type@ *)dataptr[@nop@]) = @to@(accum +
+ @from@(*((@type@ *)dataptr[@nop@])));
+# else
+ *((@type@ *)dataptr[nop]) = @to@(accum +
+ @from@(*((@type@ *)dataptr[nop])));
+# endif
+#endif
+
+}
+
+/**end repeat1**/
+
+/**end repeat**/
+
+
+/* Do OR of ANDs for the boolean type */
+
+/**begin repeat
+ * #nop = 1, 2, 3, 1000#
+ * #noplabel = one, two, three, any#
+ */
+
+static void
+bool_sum_of_products_@noplabel@(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+#if (@nop@ <= 3)
+ char *data0 = dataptr[0];
+ npy_intp stride0 = strides[0];
+#endif
+#if (@nop@ == 2 || @nop@ == 3)
+ char *data1 = dataptr[1];
+ npy_intp stride1 = strides[1];
+#endif
+#if (@nop@ == 3)
+ char *data2 = dataptr[2];
+ npy_intp stride2 = strides[2];
+#endif
+#if (@nop@ <= 3)
+ char *data_out = dataptr[@nop@];
+ npy_intp stride_out = strides[@nop@];
+#endif
+
+ while (count--) {
+#if @nop@ == 1
+ *(npy_bool *)data_out = *(npy_bool *)data0 ||
+ *(npy_bool *)data_out;
+ data0 += stride0;
+ data_out += stride_out;
+#elif @nop@ == 2
+ *(npy_bool *)data_out = (*(npy_bool *)data0 &&
+ *(npy_bool *)data1) ||
+ *(npy_bool *)data_out;
+ data0 += stride0;
+ data1 += stride1;
+ data_out += stride_out;
+#elif @nop@ == 3
+ *(npy_bool *)data_out = (*(npy_bool *)data0 &&
+ *(npy_bool *)data1 &&
+ *(npy_bool *)data2) ||
+ *(npy_bool *)data_out;
+ data0 += stride0;
+ data1 += stride1;
+ data2 += stride2;
+ data_out += stride_out;
+#else
+ npy_bool temp = *(npy_bool *)dataptr[0];
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp = temp && *(npy_bool *)dataptr[i];
+ }
+ *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i];
+ for (i = 0; i <= nop; ++i) {
+ dataptr[i] += strides[i];
+ }
+#endif
+ }
+}
+
+static void
+bool_sum_of_products_contig_@noplabel@(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+#if (@nop@ <= 3)
+ char *data0 = dataptr[0];
+#endif
+#if (@nop@ == 2 || @nop@ == 3)
+ char *data1 = dataptr[1];
+#endif
+#if (@nop@ == 3)
+ char *data2 = dataptr[2];
+#endif
+#if (@nop@ <= 3)
+ char *data_out = dataptr[@nop@];
+#endif
+
+#if (@nop@ <= 3)
+/* This is placed before the main loop to make small counts faster */
+finish_after_unrolled_loop:
+ switch (count) {
+/**begin repeat1
+ * #i = 6, 5, 4, 3, 2, 1, 0#
+ */
+ case @i@+1:
+# if @nop@ == 1
+ ((npy_bool *)data_out)[@i@] = ((npy_bool *)data0)[@i@] ||
+ ((npy_bool *)data_out)[@i@];
+# elif @nop@ == 2
+ ((npy_bool *)data_out)[@i@] =
+ (((npy_bool *)data0)[@i@] &&
+ ((npy_bool *)data1)[@i@]) ||
+ ((npy_bool *)data_out)[@i@];
+# elif @nop@ == 3
+ ((npy_bool *)data_out)[@i@] =
+ (((npy_bool *)data0)[@i@] &&
+ ((npy_bool *)data1)[@i@] &&
+ ((npy_bool *)data2)[@i@]) ||
+ ((npy_bool *)data_out)[@i@];
+# endif
+/**end repeat1**/
+ case 0:
+ return;
+ }
+#endif
+
+/* Unroll the loop by 8 for fixed-size nop */
+#if (@nop@ <= 3)
+ while (count >= 8) {
+ count -= 8;
+#else
+ while (count--) {
+#endif
+
+# if @nop@ == 1
+/**begin repeat1
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ *((npy_bool *)data_out + @i@) = (*((npy_bool *)data0 + @i@)) ||
+ (*((npy_bool *)data_out + @i@));
+/**end repeat1**/
+ data0 += 8*sizeof(npy_bool);
+ data_out += 8*sizeof(npy_bool);
+# elif @nop@ == 2
+/**begin repeat1
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ *((npy_bool *)data_out + @i@) =
+ ((*((npy_bool *)data0 + @i@)) &&
+ (*((npy_bool *)data1 + @i@))) ||
+ (*((npy_bool *)data_out + @i@));
+/**end repeat1**/
+ data0 += 8*sizeof(npy_bool);
+ data1 += 8*sizeof(npy_bool);
+ data_out += 8*sizeof(npy_bool);
+# elif @nop@ == 3
+/**begin repeat1
+ * #i = 0, 1, 2, 3, 4, 5, 6, 7#
+ */
+ *((npy_bool *)data_out + @i@) =
+ ((*((npy_bool *)data0 + @i@)) &&
+ (*((npy_bool *)data1 + @i@)) &&
+ (*((npy_bool *)data2 + @i@))) ||
+ (*((npy_bool *)data_out + @i@));
+/**end repeat1**/
+ data0 += 8*sizeof(npy_bool);
+ data1 += 8*sizeof(npy_bool);
+ data2 += 8*sizeof(npy_bool);
+ data_out += 8*sizeof(npy_bool);
+# else
+ npy_bool temp = *(npy_bool *)dataptr[0];
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp = temp && *(npy_bool *)dataptr[i];
+ }
+ *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i];
+ for (i = 0; i <= nop; ++i) {
+ dataptr[i] += sizeof(npy_bool);
+ }
+# endif
+ }
+
+ /* If the loop was unrolled, we need to finish it off */
+#if (@nop@ <= 3)
+ goto finish_after_unrolled_loop;
+#endif
+}
+
+static void
+bool_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr,
+ npy_intp const *strides, npy_intp count)
+{
+ npy_bool accum = 0;
+
+#if (@nop@ <= 3)
+ char *data0 = dataptr[0];
+ npy_intp stride0 = strides[0];
+#endif
+#if (@nop@ == 2 || @nop@ == 3)
+ char *data1 = dataptr[1];
+ npy_intp stride1 = strides[1];
+#endif
+#if (@nop@ == 3)
+ char *data2 = dataptr[2];
+ npy_intp stride2 = strides[2];
+#endif
+
+ while (count--) {
+#if @nop@ == 1
+ accum = *(npy_bool *)data0 || accum;
+ data0 += stride0;
+#elif @nop@ == 2
+ accum = (*(npy_bool *)data0 && *(npy_bool *)data1) || accum;
+ data0 += stride0;
+ data1 += stride1;
+#elif @nop@ == 3
+ accum = (*(npy_bool *)data0 &&
+ *(npy_bool *)data1 &&
+ *(npy_bool *)data2) || accum;
+ data0 += stride0;
+ data1 += stride1;
+ data2 += stride2;
+#else
+ npy_bool temp = *(npy_bool *)dataptr[0];
+ int i;
+ for (i = 1; i < nop; ++i) {
+ temp = temp && *(npy_bool *)dataptr[i];
+ }
+ accum = temp || accum;
+ for (i = 0; i <= nop; ++i) {
+ dataptr[i] += strides[i];
+ }
+#endif
+ }
+
+# if @nop@ <= 3
+ *((npy_bool *)dataptr[@nop@]) = accum || *((npy_bool *)dataptr[@nop@]);
+# else
+ *((npy_bool *)dataptr[nop]) = accum || *((npy_bool *)dataptr[nop]);
+# endif
+}
+
+/**end repeat**/
+
+/* These tables need to match up with the type enum */
+static sum_of_products_fn
+_contig_outstride0_unary_specialization_table[NPY_NTYPES] = {
+/**begin repeat
+ * #name = bool,
+ * byte, ubyte,
+ * short, ushort,
+ * int, uint,
+ * long, ulong,
+ * longlong, ulonglong,
+ * float, double, longdouble,
+ * cfloat, cdouble, clongdouble,
+ * object, string, unicode, void,
+ * datetime, timedelta, half#
+ * #use = 0,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1, 1,
+ * 1, 1, 1,
+ * 0, 0, 0, 0,
+ * 0, 0, 1#
+ */
+#if @use@
+ &@name@_sum_of_products_contig_outstride0_one,
+#else
+ NULL,
+#endif
+/**end repeat**/
+}; /* End of _contig_outstride0_unary_specialization_table */
+
+static sum_of_products_fn _binary_specialization_table[NPY_NTYPES][5] = {
+/**begin repeat
+ * #name = bool,
+ * byte, ubyte,
+ * short, ushort,
+ * int, uint,
+ * long, ulong,
+ * longlong, ulonglong,
+ * float, double, longdouble,
+ * cfloat, cdouble, clongdouble,
+ * object, string, unicode, void,
+ * datetime, timedelta, half#
+ * #use = 0,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1, 1,
+ * 0, 0, 0,
+ * 0, 0, 0, 0,
+ * 0, 0, 1#
+ */
+#if @use@
+{
+ &@name@_sum_of_products_stride0_contig_outstride0_two,
+ &@name@_sum_of_products_stride0_contig_outcontig_two,
+ &@name@_sum_of_products_contig_stride0_outstride0_two,
+ &@name@_sum_of_products_contig_stride0_outcontig_two,
+ &@name@_sum_of_products_contig_contig_outstride0_two,
+},
+#else
+ {NULL, NULL, NULL, NULL, NULL},
+#endif
+/**end repeat**/
+}; /* End of _binary_specialization_table */
+
+static sum_of_products_fn _outstride0_specialized_table[NPY_NTYPES][4] = {
+/**begin repeat
+ * #name = bool,
+ * byte, ubyte,
+ * short, ushort,
+ * int, uint,
+ * long, ulong,
+ * longlong, ulonglong,
+ * float, double, longdouble,
+ * cfloat, cdouble, clongdouble,
+ * object, string, unicode, void,
+ * datetime, timedelta, half#
+ * #use = 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1, 1,
+ * 1, 1, 1,
+ * 0, 0, 0, 0,
+ * 0, 0, 1#
+ */
+#if @use@
+{
+ &@name@_sum_of_products_outstride0_any,
+ &@name@_sum_of_products_outstride0_one,
+ &@name@_sum_of_products_outstride0_two,
+ &@name@_sum_of_products_outstride0_three
+},
+#else
+ {NULL, NULL, NULL, NULL},
+#endif
+/**end repeat**/
+}; /* End of _outstride0_specialized_table */
+
+static sum_of_products_fn _allcontig_specialized_table[NPY_NTYPES][4] = {
+/**begin repeat
+ * #name = bool,
+ * byte, ubyte,
+ * short, ushort,
+ * int, uint,
+ * long, ulong,
+ * longlong, ulonglong,
+ * float, double, longdouble,
+ * cfloat, cdouble, clongdouble,
+ * object, string, unicode, void,
+ * datetime, timedelta, half#
+ * #use = 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1, 1,
+ * 1, 1, 1,
+ * 0, 0, 0, 0,
+ * 0, 0, 1#
+ */
+#if @use@
+{
+ &@name@_sum_of_products_contig_any,
+ &@name@_sum_of_products_contig_one,
+ &@name@_sum_of_products_contig_two,
+ &@name@_sum_of_products_contig_three
+},
+#else
+ {NULL, NULL, NULL, NULL},
+#endif
+/**end repeat**/
+}; /* End of _allcontig_specialized_table */
+
+static sum_of_products_fn _unspecialized_table[NPY_NTYPES][4] = {
+/**begin repeat
+ * #name = bool,
+ * byte, ubyte,
+ * short, ushort,
+ * int, uint,
+ * long, ulong,
+ * longlong, ulonglong,
+ * float, double, longdouble,
+ * cfloat, cdouble, clongdouble,
+ * object, string, unicode, void,
+ * datetime, timedelta, half#
+ * #use = 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1,
+ * 1, 1, 1,
+ * 1, 1, 1,
+ * 0, 0, 0, 0,
+ * 0, 0, 1#
+ */
+#if @use@
+{
+ &@name@_sum_of_products_any,
+ &@name@_sum_of_products_one,
+ &@name@_sum_of_products_two,
+ &@name@_sum_of_products_three
+},
+#else
+ {NULL, NULL, NULL, NULL},
+#endif
+/**end repeat**/
+}; /* End of _unnspecialized_table */
+
+NPY_VISIBILITY_HIDDEN sum_of_products_fn
+get_sum_of_products_function(int nop, int type_num,
+ npy_intp itemsize, npy_intp const *fixed_strides)
+{
+ int iop;
+
+ if (type_num >= NPY_NTYPES) {
+ return NULL;
+ }
+
+ /* contiguous reduction */
+ if (nop == 1 && fixed_strides[0] == itemsize && fixed_strides[1] == 0) {
+ sum_of_products_fn ret =
+ _contig_outstride0_unary_specialization_table[type_num];
+ if (ret != NULL) {
+ return ret;
+ }
+ }
+
+ /* nop of 2 has more specializations */
+ if (nop == 2) {
+ /* Encode the zero/contiguous strides */
+ int code;
+ code = (fixed_strides[0] == 0) ? 0 :
+ (fixed_strides[0] == itemsize) ? 2*2*1 : 8;
+ code += (fixed_strides[1] == 0) ? 0 :
+ (fixed_strides[1] == itemsize) ? 2*1 : 8;
+ code += (fixed_strides[2] == 0) ? 0 :
+ (fixed_strides[2] == itemsize) ? 1 : 8;
+ if (code >= 2 && code < 7) {
+ sum_of_products_fn ret =
+ _binary_specialization_table[type_num][code-2];
+ if (ret != NULL) {
+ return ret;
+ }
+ }
+ }
+
+ /* Inner loop with an output stride of 0 */
+ if (fixed_strides[nop] == 0) {
+ return _outstride0_specialized_table[type_num][nop <= 3 ? nop : 0];
+ }
+
+ /* Check for all contiguous */
+ for (iop = 0; iop < nop + 1; ++iop) {
+ if (fixed_strides[iop] != itemsize) {
+ break;
+ }
+ }
+
+ /* Contiguous loop */
+ if (iop == nop + 1) {
+ return _allcontig_specialized_table[type_num][nop <= 3 ? nop : 0];
+ }
+
+ /* None of the above specializations caught it, general loops */
+ return _unspecialized_table[type_num][nop <= 3 ? nop : 0];
+}
diff --git a/numpy/core/src/multiarray/einsum_sumprod.h b/numpy/core/src/multiarray/einsum_sumprod.h
new file mode 100644
index 000000000..c6cf18ec6
--- /dev/null
+++ b/numpy/core/src/multiarray/einsum_sumprod.h
@@ -0,0 +1,12 @@
+#ifndef _NPY_MULTIARRAY_EINSUM_SUMPROD_H
+#define _NPY_MULTIARRAY_EINSUM_SUMPROD_H
+
+#include <numpy/npy_common.h>
+
+typedef void (*sum_of_products_fn)(int, char **, npy_intp const*, npy_intp);
+
+NPY_VISIBILITY_HIDDEN sum_of_products_fn
+get_sum_of_products_function(int nop, int type_num,
+ npy_intp itemsize, npy_intp const *fixed_strides);
+
+#endif
diff --git a/numpy/core/src/multiarray/flagsobject.c b/numpy/core/src/multiarray/flagsobject.c
index bec0523d5..9b7d8deae 100644
--- a/numpy/core/src/multiarray/flagsobject.c
+++ b/numpy/core/src/multiarray/flagsobject.c
@@ -711,7 +711,7 @@ arrayflags_print(PyArrayFlagsObject *self)
if (fl & NPY_ARRAY_WARN_ON_WRITE) {
_warn_on_write = " (with WARN_ON_WRITE=True)";
}
- return PyUString_FromFormat(
+ return PyUnicode_FromFormat(
" %s : %s\n %s : %s\n"
" %s : %s\n %s : %s%s\n"
" %s : %s\n %s : %s\n"
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index c8533539b..3575d6fad 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -217,7 +217,7 @@ array_protocol_descr_get(PyArrayObject *self)
if (dobj == NULL) {
return NULL;
}
- PyTuple_SET_ITEM(dobj, 0, PyString_FromString(""));
+ PyTuple_SET_ITEM(dobj, 0, PyUnicode_FromString(""));
PyTuple_SET_ITEM(dobj, 1, array_typestr_get(self));
res = PyList_New(1);
if (res == NULL) {
@@ -621,7 +621,6 @@ static PyObject *
array_struct_get(PyArrayObject *self)
{
PyArrayInterface *inter;
- PyObject *ret;
inter = (PyArrayInterface *)PyArray_malloc(sizeof(PyArrayInterface));
if (inter==NULL) {
@@ -673,8 +672,14 @@ array_struct_get(PyArrayObject *self)
else {
inter->descr = NULL;
}
+ PyObject *ret = PyCapsule_New(inter, NULL, gentype_struct_free);
+ if (ret == NULL) {
+ return NULL;
+ }
Py_INCREF(self);
- ret = NpyCapsule_FromVoidPtrAndDesc(inter, self, gentype_struct_free);
+ if (PyCapsule_SetContext(ret, self) < 0) {
+ return NULL;
+ }
return ret;
}
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index db15ff1d5..c0cea0f21 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1418,7 +1418,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
return 0;
}
else if (tup == NULL){
- PyObject *errmsg = PyUString_FromString("no field of name ");
+ PyObject *errmsg = PyUnicode_FromString("no field of name ");
PyUString_Concat(&errmsg, ind);
PyErr_SetObject(PyExc_ValueError, errmsg);
Py_DECREF(errmsg);
@@ -2438,7 +2438,7 @@ mapiter_fill_info(PyArrayMapIterObject *mit, npy_index_info *indices,
* Attempt to set a meaningful exception. Could also find out
* if a boolean index was converted.
*/
- errmsg = PyUString_FromString("shape mismatch: indexing arrays could not "
+ errmsg = PyUnicode_FromString("shape mismatch: indexing arrays could not "
"be broadcast together with shapes ");
if (errmsg == NULL) {
return -1;
@@ -3183,7 +3183,7 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
goto finish;
broadcast_error:
- errmsg = PyUString_FromString("shape mismatch: value array "
+ errmsg = PyUnicode_FromString("shape mismatch: value array "
"of shape ");
if (errmsg == NULL) {
goto finish;
@@ -3204,7 +3204,7 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
goto finish;
}
- tmp = PyUString_FromString("could not be broadcast to indexing "
+ tmp = PyUnicode_FromString("could not be broadcast to indexing "
"result of shape ");
PyUString_ConcatAndDel(&errmsg, tmp);
if (errmsg == NULL) {
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 0519434e8..ae2dceb10 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -2585,9 +2585,10 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
PyArrayObject *arr;
PyArray_Descr *dtype;
PyObject *c;
+
if (PyArray_SIZE(self) != 1) {
- PyErr_SetString(PyExc_TypeError, "only length-1 arrays can "\
- "be converted to Python scalars");
+ PyErr_SetString(PyExc_TypeError,
+ "only length-1 arrays can be converted to Python scalars");
return NULL;
}
@@ -2598,38 +2599,18 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
if (!PyArray_CanCastArrayTo(self, dtype, NPY_SAME_KIND_CASTING) &&
!(PyArray_TYPE(self) == NPY_OBJECT)) {
- PyObject *err, *msg_part;
+ PyObject *descr = (PyObject*)PyArray_DESCR(self);
+
Py_DECREF(dtype);
- err = PyString_FromString("unable to convert ");
- if (err == NULL) {
- return NULL;
- }
- msg_part = PyObject_Repr((PyObject*)PyArray_DESCR(self));
- if (msg_part == NULL) {
- Py_DECREF(err);
- return NULL;
- }
- PyString_ConcatAndDel(&err, msg_part);
- if (err == NULL) {
- return NULL;
- }
- msg_part = PyString_FromString(", to complex.");
- if (msg_part == NULL) {
- Py_DECREF(err);
- return NULL;
- }
- PyString_ConcatAndDel(&err, msg_part);
- if (err == NULL) {
- return NULL;
- }
- PyErr_SetObject(PyExc_TypeError, err);
- Py_DECREF(err);
+ PyErr_Format(PyExc_TypeError,
+ "Unable to convert %R to complex", descr);
return NULL;
}
if (PyArray_TYPE(self) == NPY_OBJECT) {
/* let python try calling __complex__ on the object. */
PyObject *args, *res;
+
Py_DECREF(dtype);
args = Py_BuildValue("(O)", *((PyObject**)PyArray_DATA(self)));
if (args == NULL) {
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 923469edf..db419636d 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -4335,18 +4335,18 @@ NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_axis2 = NULL;
static int
intern_strings(void)
{
- npy_ma_str_array = PyUString_InternFromString("__array__");
- npy_ma_str_array_prepare = PyUString_InternFromString("__array_prepare__");
- npy_ma_str_array_wrap = PyUString_InternFromString("__array_wrap__");
- npy_ma_str_array_finalize = PyUString_InternFromString("__array_finalize__");
- npy_ma_str_ufunc = PyUString_InternFromString("__array_ufunc__");
- npy_ma_str_implementation = PyUString_InternFromString("_implementation");
- npy_ma_str_order = PyUString_InternFromString("order");
- npy_ma_str_copy = PyUString_InternFromString("copy");
- npy_ma_str_dtype = PyUString_InternFromString("dtype");
- npy_ma_str_ndmin = PyUString_InternFromString("ndmin");
- npy_ma_str_axis1 = PyUString_InternFromString("axis1");
- npy_ma_str_axis2 = PyUString_InternFromString("axis2");
+ npy_ma_str_array = PyUnicode_InternFromString("__array__");
+ npy_ma_str_array_prepare = PyUnicode_InternFromString("__array_prepare__");
+ npy_ma_str_array_wrap = PyUnicode_InternFromString("__array_wrap__");
+ npy_ma_str_array_finalize = PyUnicode_InternFromString("__array_finalize__");
+ npy_ma_str_ufunc = PyUnicode_InternFromString("__array_ufunc__");
+ npy_ma_str_implementation = PyUnicode_InternFromString("_implementation");
+ npy_ma_str_order = PyUnicode_InternFromString("order");
+ npy_ma_str_copy = PyUnicode_InternFromString("copy");
+ npy_ma_str_dtype = PyUnicode_InternFromString("dtype");
+ npy_ma_str_ndmin = PyUnicode_InternFromString("ndmin");
+ npy_ma_str_axis1 = PyUnicode_InternFromString("axis1");
+ npy_ma_str_axis2 = PyUnicode_InternFromString("axis2");
return npy_ma_str_array && npy_ma_str_array_prepare &&
npy_ma_str_array_wrap && npy_ma_str_array_finalize &&
@@ -4477,14 +4477,14 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
goto err;
}
- c_api = NpyCapsule_FromVoidPtr((void *)PyArray_API, NULL);
+ c_api = PyCapsule_New((void *)PyArray_API, NULL, NULL);
if (c_api == NULL) {
goto err;
}
PyDict_SetItemString(d, "_ARRAY_API", c_api);
Py_DECREF(c_api);
- c_api = NpyCapsule_FromVoidPtr((void *)PyUFunc_API, NULL);
+ c_api = PyCapsule_New((void *)PyUFunc_API, NULL, NULL);
if (c_api == NULL) {
goto err;
}
@@ -4506,7 +4506,7 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
PyDict_SetItemString(d, "tracemalloc_domain", s);
Py_DECREF(s);
- s = PyUString_FromString("3.1");
+ s = PyUnicode_FromString("3.1");
PyDict_SetItemString(d, "__version__", s);
Py_DECREF(s);
@@ -4540,7 +4540,7 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
}
Py_DECREF(s);
- s = NpyCapsule_FromVoidPtr((void *)_datetime_strings, NULL);
+ s = PyCapsule_New((void *)_datetime_strings, NULL, NULL);
if (s == NULL) {
goto err;
}
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index a0dda4090..4bc6d2ca1 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -1755,7 +1755,7 @@ broadcast_error: {
char *tmpstr;
if (op_axes == NULL) {
- errmsg = PyUString_FromString("operands could not be broadcast "
+ errmsg = PyUnicode_FromString("operands could not be broadcast "
"together with shapes ");
if (errmsg == NULL) {
return 0;
@@ -1776,7 +1776,7 @@ broadcast_error: {
}
}
if (itershape != NULL) {
- tmp = PyUString_FromString("and requested shape ");
+ tmp = PyUnicode_FromString("and requested shape ");
if (tmp == NULL) {
Py_DECREF(errmsg);
return 0;
@@ -1801,7 +1801,7 @@ broadcast_error: {
Py_DECREF(errmsg);
}
else {
- errmsg = PyUString_FromString("operands could not be broadcast "
+ errmsg = PyUnicode_FromString("operands could not be broadcast "
"together with remapped shapes "
"[original->remapped]: ");
for (iop = 0; iop < nop; ++iop) {
@@ -1843,7 +1843,7 @@ broadcast_error: {
}
}
if (itershape != NULL) {
- tmp = PyUString_FromString("and requested shape ");
+ tmp = PyUnicode_FromString("and requested shape ");
if (tmp == NULL) {
Py_DECREF(errmsg);
return 0;
@@ -1877,11 +1877,11 @@ operand_different_than_broadcast: {
/* Start of error message */
if (op_flags[iop] & NPY_ITER_READONLY) {
- errmsg = PyUString_FromString("non-broadcastable operand "
+ errmsg = PyUnicode_FromString("non-broadcastable operand "
"with shape ");
}
else {
- errmsg = PyUString_FromString("non-broadcastable output "
+ errmsg = PyUnicode_FromString("non-broadcastable output "
"operand with shape ");
}
if (errmsg == NULL) {
@@ -1913,7 +1913,7 @@ operand_different_than_broadcast: {
}
}
- tmp = PyUString_FromString(" [remapped to ");
+ tmp = PyUnicode_FromString(" [remapped to ");
if (tmp == NULL) {
return 0;
}
@@ -1932,7 +1932,7 @@ operand_different_than_broadcast: {
}
}
- tmp = PyUString_FromString(" doesn't match the broadcast shape ");
+ tmp = PyUnicode_FromString(" doesn't match the broadcast shape ");
if (tmp == NULL) {
return 0;
}
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index e271906c1..5b4836cc9 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -1142,7 +1142,7 @@ npyiter_dealloc(NewNpyArrayIterObject *self)
"results.", 1) < 0) {
PyObject *s;
- s = PyUString_FromString("npyiter_dealloc");
+ s = PyUnicode_FromString("npyiter_dealloc");
if (s) {
PyErr_WriteUnraisable(s);
Py_DECREF(s);
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 58b9e2c30..c1bff1e42 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -447,7 +447,7 @@ _void_to_hex(const char* argbuf, const Py_ssize_t arglen,
}
memcpy(&retbuf[j], echars, strlen(echars));
- retval = PyUString_FromStringAndSize(retbuf, slen);
+ retval = PyUnicode_FromStringAndSize(retbuf, slen);
PyMem_Free(retbuf);
return retval;
@@ -518,21 +518,21 @@ datetimetype_repr(PyObject *self)
*/
if ((scal->obmeta.num == 1 && scal->obmeta.base != NPY_FR_h) ||
scal->obmeta.base == NPY_FR_GENERIC) {
- ret = PyUString_FromString("numpy.datetime64('");
+ ret = PyUnicode_FromString("numpy.datetime64('");
PyUString_ConcatAndDel(&ret,
- PyUString_FromString(iso));
+ PyUnicode_FromString(iso));
PyUString_ConcatAndDel(&ret,
- PyUString_FromString("')"));
+ PyUnicode_FromString("')"));
}
else {
- ret = PyUString_FromString("numpy.datetime64('");
+ ret = PyUnicode_FromString("numpy.datetime64('");
PyUString_ConcatAndDel(&ret,
- PyUString_FromString(iso));
+ PyUnicode_FromString(iso));
PyUString_ConcatAndDel(&ret,
- PyUString_FromString("','"));
+ PyUnicode_FromString("','"));
ret = append_metastr_to_string(&scal->obmeta, 1, ret);
PyUString_ConcatAndDel(&ret,
- PyUString_FromString("')"));
+ PyUnicode_FromString("')"));
}
return ret;
@@ -554,31 +554,31 @@ timedeltatype_repr(PyObject *self)
/* The value */
if (scal->obval == NPY_DATETIME_NAT) {
- ret = PyUString_FromString("numpy.timedelta64('NaT'");
+ ret = PyUnicode_FromString("numpy.timedelta64('NaT'");
}
else {
/*
* Can't use "%lld" if HAVE_LONG_LONG is not defined
*/
#if defined(HAVE_LONG_LONG)
- ret = PyUString_FromFormat("numpy.timedelta64(%lld",
+ ret = PyUnicode_FromFormat("numpy.timedelta64(%lld",
(long long)scal->obval);
#else
- ret = PyUString_FromFormat("numpy.timedelta64(%ld",
+ ret = PyUnicode_FromFormat("numpy.timedelta64(%ld",
(long)scal->obval);
#endif
}
/* The metadata unit */
if (scal->obmeta.base == NPY_FR_GENERIC) {
PyUString_ConcatAndDel(&ret,
- PyUString_FromString(")"));
+ PyUnicode_FromString(")"));
}
else {
PyUString_ConcatAndDel(&ret,
- PyUString_FromString(",'"));
+ PyUnicode_FromString(",'"));
ret = append_metastr_to_string(&scal->obmeta, 1, ret);
PyUString_ConcatAndDel(&ret,
- PyUString_FromString("')"));
+ PyUnicode_FromString("')"));
}
return ret;
@@ -611,7 +611,7 @@ datetimetype_str(PyObject *self)
return NULL;
}
- return PyUString_FromString(iso);
+ return PyUnicode_FromString(iso);
}
static char *_datetime_verbose_strings[NPY_DATETIME_NUMUNITS] = {
@@ -657,21 +657,21 @@ timedeltatype_str(PyObject *self)
}
if (scal->obval == NPY_DATETIME_NAT) {
- ret = PyUString_FromString("NaT");
+ ret = PyUnicode_FromString("NaT");
}
else {
/*
* Can't use "%lld" if HAVE_LONG_LONG is not defined
*/
#if defined(HAVE_LONG_LONG)
- ret = PyUString_FromFormat("%lld ",
+ ret = PyUnicode_FromFormat("%lld ",
(long long)(scal->obval * scal->obmeta.num));
#else
- ret = PyUString_FromFormat("%ld ",
+ ret = PyUnicode_FromFormat("%ld ",
(long)(scal->obval * scal->obmeta.num));
#endif
PyUString_ConcatAndDel(&ret,
- PyUString_FromString(basestr));
+ PyUnicode_FromString(basestr));
}
return ret;
@@ -795,7 +795,7 @@ legacy_@name@_format@kind@(@type@ val)
PyOS_snprintf(buf, sizeof(buf), "(%s%sj)", re, im);
}
- return PyUString_FromString(buf);
+ return PyUnicode_FromString(buf);
}
#undef _FMT1
@@ -836,7 +836,7 @@ legacy_@name@_format@kind@(npy_@name@ val){
strcpy(&buf[cnt],".0");
}
- return PyUString_FromString(buf);
+ return PyUnicode_FromString(buf);
}
#undef _FMT1
@@ -904,7 +904,7 @@ c@name@type_@kind@(PyObject *self)
return NULL;
}
- PyUString_ConcatAndDel(&istr, PyUString_FromString("j"));
+ PyUString_ConcatAndDel(&istr, PyUnicode_FromString("j"));
return istr;
}
@@ -915,13 +915,13 @@ c@name@type_@kind@(PyObject *self)
}
}
else if (npy_isnan(val.real)) {
- rstr = PyUString_FromString("nan");
+ rstr = PyUnicode_FromString("nan");
}
else if (val.real > 0){
- rstr = PyUString_FromString("inf");
+ rstr = PyUnicode_FromString("inf");
}
else {
- rstr = PyUString_FromString("-inf");
+ rstr = PyUnicode_FromString("-inf");
}
if (npy_isfinite(val.imag)) {
@@ -931,19 +931,19 @@ c@name@type_@kind@(PyObject *self)
}
}
else if (npy_isnan(val.imag)) {
- istr = PyUString_FromString("+nan");
+ istr = PyUnicode_FromString("+nan");
}
else if (val.imag > 0){
- istr = PyUString_FromString("+inf");
+ istr = PyUnicode_FromString("+inf");
}
else {
- istr = PyUString_FromString("-inf");
+ istr = PyUnicode_FromString("-inf");
}
- ret = PyUString_FromString("(");
+ ret = PyUnicode_FromString("(");
PyUString_ConcatAndDel(&ret, rstr);
PyUString_ConcatAndDel(&ret, istr);
- PyUString_ConcatAndDel(&ret, PyUString_FromString("j)"));
+ PyUString_ConcatAndDel(&ret, PyUnicode_FromString("j)"));
return ret;
}
@@ -1147,12 +1147,16 @@ gentype_sizeof(PyObject *self)
NPY_NO_EXPORT void
gentype_struct_free(PyObject *ptr)
{
- PyArrayInterface *arrif;
- PyObject *context;
-
- arrif = (PyArrayInterface*)PyCapsule_GetPointer(ptr, NULL);
- context = (PyObject *)PyCapsule_GetContext(ptr);
- Py_DECREF(context);
+ PyArrayInterface *arrif = (PyArrayInterface*)PyCapsule_GetPointer(ptr, NULL);
+ if (arrif == NULL) {
+ PyErr_WriteUnraisable(ptr);
+ return;
+ }
+ PyObject *context = (PyObject *)PyCapsule_GetContext(ptr);
+ if (context == NULL && PyErr_Occurred()) {
+ PyErr_WriteUnraisable(ptr);
+ }
+ Py_XDECREF(context);
Py_XDECREF(arrif->descr);
PyArray_free(arrif->shape);
PyArray_free(arrif);
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 73bb7933f..397d539c1 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -458,7 +458,7 @@ _attempt_nocopy_reshape(PyArrayObject *self, int newnd, const npy_intp *newdims,
static void
raise_reshape_size_mismatch(PyArray_Dims *newshape, PyArrayObject *arr)
{
- PyObject *msg = PyUString_FromFormat("cannot reshape array of size %zd "
+ PyObject *msg = PyUnicode_FromFormat("cannot reshape array of size %zd "
"into shape ", PyArray_SIZE(arr));
PyObject *tmp = convert_shape_to_string(newshape->len, newshape->ptr, "");
@@ -997,10 +997,10 @@ build_shape_string(npy_intp n, npy_intp const *vals)
}
if (i == n) {
- return PyUString_FromFormat("()");
+ return PyUnicode_FromFormat("()");
}
else {
- ret = PyUString_FromFormat("(%" NPY_INTP_FMT, vals[i++]);
+ ret = PyUnicode_FromFormat("(%" NPY_INTP_FMT, vals[i++]);
if (ret == NULL) {
return NULL;
}
@@ -1008,10 +1008,10 @@ build_shape_string(npy_intp n, npy_intp const *vals)
for (; i < n; ++i) {
if (vals[i] < 0) {
- tmp = PyUString_FromString(",newaxis");
+ tmp = PyUnicode_FromString(",newaxis");
}
else {
- tmp = PyUString_FromFormat(",%" NPY_INTP_FMT, vals[i]);
+ tmp = PyUnicode_FromFormat(",%" NPY_INTP_FMT, vals[i]);
}
if (tmp == NULL) {
Py_DECREF(ret);
@@ -1024,7 +1024,7 @@ build_shape_string(npy_intp n, npy_intp const *vals)
}
}
- tmp = PyUString_FromFormat(")");
+ tmp = PyUnicode_FromFormat(")");
PyUString_ConcatAndDel(&ret, tmp);
return ret;
}
diff --git a/numpy/core/src/multiarray/strfuncs.c b/numpy/core/src/multiarray/strfuncs.c
index 363cbdba2..d9d9b7c0a 100644
--- a/numpy/core/src/multiarray/strfuncs.c
+++ b/numpy/core/src/multiarray/strfuncs.c
@@ -3,14 +3,25 @@
#include <Python.h>
#include <numpy/arrayobject.h>
-
#include "npy_pycompat.h"
-
+#include "npy_import.h"
#include "strfuncs.h"
static PyObject *PyArray_StrFunction = NULL;
static PyObject *PyArray_ReprFunction = NULL;
+
+static void
+npy_PyErr_SetStringChained(PyObject *type, const char *message)
+{
+ PyObject *exc, *val, *tb;
+
+ PyErr_Fetch(&exc, &val, &tb);
+ PyErr_SetString(type, message);
+ npy_PyErr_ChainExceptionsCause(exc, val, tb);
+}
+
+
/*NUMPY_API
* Set the array print function to be a Python function.
*/
@@ -36,164 +47,52 @@ PyArray_SetStringFunction(PyObject *op, int repr)
}
-/*
- * Extend string. On failure, returns NULL and leaves *strp alone.
- * XXX we do this in multiple places; time for a string library?
- */
-static char *
-extend_str(char **strp, Py_ssize_t n, Py_ssize_t *maxp)
-{
- char *str = *strp;
- Py_ssize_t new_cap;
-
- if (n >= *maxp - 16) {
- new_cap = *maxp * 2;
-
- if (new_cap <= *maxp) { /* overflow */
- return NULL;
- }
- str = PyArray_realloc(*strp, new_cap);
- if (str != NULL) {
- *strp = str;
- *maxp = new_cap;
- }
- }
- return str;
-}
-
-
-static int
-dump_data(char **string, Py_ssize_t *n, Py_ssize_t *max_n, char *data, int nd,
- npy_intp const *dimensions, npy_intp const *strides, PyArrayObject* self)
-{
- PyObject *op = NULL, *sp = NULL;
- char *ostring;
- npy_intp i, N, ret = 0;
-
-#define CHECK_MEMORY do { \
- if (extend_str(string, *n, max_n) == NULL) { \
- ret = -1; \
- goto end; \
- } \
- } while (0)
-
- if (nd == 0) {
- if ((op = PyArray_GETITEM(self, data)) == NULL) {
- return -1;
- }
- sp = PyObject_Repr(op);
- if (sp == NULL) {
- ret = -1;
- goto end;
- }
- ostring = PyString_AsString(sp);
- N = PyString_Size(sp)*sizeof(char);
- *n += N;
- CHECK_MEMORY;
- memmove(*string + (*n - N), ostring, N);
- }
- else {
- CHECK_MEMORY;
- (*string)[*n] = '[';
- *n += 1;
- for (i = 0; i < dimensions[0]; i++) {
- if (dump_data(string, n, max_n,
- data + (*strides)*i,
- nd - 1, dimensions + 1,
- strides + 1, self) < 0) {
- return -1;
- }
- CHECK_MEMORY;
- if (i < dimensions[0] - 1) {
- (*string)[*n] = ',';
- (*string)[*n+1] = ' ';
- *n += 2;
- }
- }
- CHECK_MEMORY;
- (*string)[*n] = ']';
- *n += 1;
- }
-
-#undef CHECK_MEMORY
-
-end:
- Py_XDECREF(op);
- Py_XDECREF(sp);
- return ret;
-}
-
-
-static PyObject *
-array_repr_builtin(PyArrayObject *self, int repr)
-{
- PyObject *ret;
- char *string;
- /* max_n initial value is arbitrary, dump_data will extend it */
- Py_ssize_t n = 0, max_n = PyArray_NBYTES(self) * 4 + 7;
-
- if ((string = PyArray_malloc(max_n)) == NULL) {
- return PyErr_NoMemory();
- }
-
- if (dump_data(&string, &n, &max_n, PyArray_DATA(self),
- PyArray_NDIM(self), PyArray_DIMS(self),
- PyArray_STRIDES(self), self) < 0) {
- PyArray_free(string);
- return NULL;
- }
-
- if (repr) {
- if (PyArray_ISEXTENDED(self)) {
- ret = PyUString_FromFormat("array(%s, '%c%d')",
- string,
- PyArray_DESCR(self)->type,
- PyArray_DESCR(self)->elsize);
- }
- else {
- ret = PyUString_FromFormat("array(%s, '%c')",
- string,
- PyArray_DESCR(self)->type);
- }
- }
- else {
- ret = PyUString_FromStringAndSize(string, n);
- }
-
- PyArray_free(string);
- return ret;
-}
-
-
NPY_NO_EXPORT PyObject *
array_repr(PyArrayObject *self)
{
- PyObject *s;
+ static PyObject *repr = NULL;
- if (PyArray_ReprFunction == NULL) {
- s = array_repr_builtin(self, 1);
+ if (PyArray_ReprFunction != NULL) {
+ return PyObject_CallFunctionObjArgs(PyArray_ReprFunction, self, NULL);
}
- else {
- s = PyObject_CallFunctionObjArgs(PyArray_ReprFunction, self, NULL);
+
+ /*
+ * We need to do a delayed import here as initialization on module load
+ * leads to circular import problems.
+ */
+ npy_cache_import("numpy.core.arrayprint", "_default_array_repr", &repr);
+ if (repr == NULL) {
+ npy_PyErr_SetStringChained(PyExc_RuntimeError,
+ "Unable to configure default ndarray.__repr__");
+ return NULL;
}
- return s;
+ return PyObject_CallFunctionObjArgs(repr, self, NULL);
}
NPY_NO_EXPORT PyObject *
array_str(PyArrayObject *self)
{
- PyObject *s;
+ static PyObject *str = NULL;
- if (PyArray_StrFunction == NULL) {
- s = array_repr_builtin(self, 0);
+ if (PyArray_StrFunction != NULL) {
+ return PyObject_CallFunctionObjArgs(PyArray_StrFunction, self, NULL);
}
- else {
- s = PyObject_CallFunctionObjArgs(PyArray_StrFunction, self, NULL);
+
+ /*
+ * We need to do a delayed import here as initialization on module load leads
+ * to circular import problems.
+ */
+ npy_cache_import("numpy.core.arrayprint", "_default_array_str", &str);
+ if (str == NULL) {
+ npy_PyErr_SetStringChained(PyExc_RuntimeError,
+ "Unable to configure default ndarray.__str__");
+ return NULL;
}
- return s;
+ return PyObject_CallFunctionObjArgs(str, self, NULL);
}
+
NPY_NO_EXPORT PyObject *
array_format(PyArrayObject *self, PyObject *args)
{
@@ -221,4 +120,3 @@ array_format(PyArrayObject *self, PyObject *args)
);
}
}
-
diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
index b97f0f8b8..6b6c6bd9d 100644
--- a/numpy/core/src/multiarray/usertypes.c
+++ b/numpy/core/src/multiarray/usertypes.c
@@ -272,7 +272,7 @@ PyArray_RegisterCastFunc(PyArray_Descr *descr, int totype,
if (PyErr_Occurred()) {
return -1;
}
- cobj = NpyCapsule_FromVoidPtr((void *)castfunc, NULL);
+ cobj = PyCapsule_New((void *)castfunc, NULL, NULL);
if (cobj == NULL) {
Py_DECREF(key);
return -1;
diff --git a/numpy/core/src/umath/_rational_tests.c.src b/numpy/core/src/umath/_rational_tests.c.src
index 13e33d0a5..cbb6d9d17 100644
--- a/numpy/core/src/umath/_rational_tests.c.src
+++ b/numpy/core/src/umath/_rational_tests.c.src
@@ -526,11 +526,11 @@ static PyObject*
pyrational_repr(PyObject* self) {
rational x = ((PyRational*)self)->r;
if (d(x)!=1) {
- return PyUString_FromFormat(
+ return PyUnicode_FromFormat(
"rational(%ld,%ld)",(long)x.n,(long)d(x));
}
else {
- return PyUString_FromFormat(
+ return PyUnicode_FromFormat(
"rational(%ld)",(long)x.n);
}
}
@@ -539,11 +539,11 @@ static PyObject*
pyrational_str(PyObject* self) {
rational x = ((PyRational*)self)->r;
if (d(x)!=1) {
- return PyUString_FromFormat(
+ return PyUnicode_FromFormat(
"%ld/%ld",(long)x.n,(long)d(x));
}
else {
- return PyUString_FromFormat(
+ return PyUnicode_FromFormat(
"%ld",(long)x.n);
}
}
@@ -1126,7 +1126,7 @@ PyMODINIT_FUNC PyInit__rational_tests(void) {
if (PyErr_Occurred()) {
goto fail;
}
- numpy_str = PyUString_FromString("numpy");
+ numpy_str = PyUnicode_FromString("numpy");
if (!numpy_str) {
goto fail;
}
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index d08aabd64..932c3b5ab 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -671,7 +671,7 @@ PyMODINIT_FUNC PyInit__umath_tests(void) {
d = PyModule_GetDict(m);
- version = PyString_FromString("0.1");
+ version = PyUnicode_FromString("0.1");
PyDict_SetItemString(d, "__version__", version);
Py_DECREF(version);
diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c
index 4a953410a..cd81f7734 100644
--- a/numpy/core/src/umath/extobj.c
+++ b/numpy/core/src/umath/extobj.c
@@ -109,7 +109,7 @@ _error_handler(int method, PyObject *errobj, char *errtype, int retstatus, int *
errtype, name);
goto fail;
}
- args = Py_BuildValue("NN", PyUString_FromString(errtype),
+ args = Py_BuildValue("NN", PyUnicode_FromString(errtype),
PyLong_FromLong((long) retstatus));
if (args == NULL) {
goto fail;
diff --git a/numpy/core/src/umath/override.c b/numpy/core/src/umath/override.c
index bf6e5a698..a0090e302 100644
--- a/numpy/core/src/umath/override.c
+++ b/numpy/core/src/umath/override.c
@@ -605,7 +605,7 @@ PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method,
goto fail;
}
- method_name = PyUString_FromString(method);
+ method_name = PyUnicode_FromString(method);
if (method_name == NULL) {
goto fail;
}
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 005556fb6..f693eb5c2 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -3318,7 +3318,6 @@ get_binary_op_function(PyUFuncObject *ufunc, int *otype,
void **out_innerloopdata)
{
int i;
- PyUFunc_Loop1d *funcdata;
NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n",
*otype);
@@ -3336,7 +3335,10 @@ get_binary_op_function(PyUFuncObject *ufunc, int *otype,
return -1;
}
else if (obj != NULL) {
- funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
+ PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
+ if (funcdata == NULL) {
+ return -1;
+ }
while (funcdata != NULL) {
int *types = funcdata->arg_types;
@@ -5190,9 +5192,12 @@ PyUFunc_RegisterLoopForDescr(PyUFuncObject *ufunc,
result = -1;
}
else {
- PyUFunc_Loop1d *current;
int cmp = 1;
- current = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(cobj);
+ PyUFunc_Loop1d *current = PyCapsule_GetPointer(cobj, NULL);
+ if (current == NULL) {
+ result = -1;
+ goto done;
+ }
while (current != NULL) {
cmp = cmp_arg_types(current->arg_types,
arg_typenums, ufunc->nargs);
@@ -5226,6 +5231,7 @@ PyUFunc_RegisterLoopForDescr(PyUFuncObject *ufunc,
}
}
+done:
PyArray_free(arg_typenums);
Py_DECREF(key);
@@ -5294,7 +5300,7 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
}
/* If it's not there, then make one and return. */
else if (cobj == NULL) {
- cobj = NpyCapsule_FromVoidPtr((void *)funcdata, _loop1d_list_free);
+ cobj = PyCapsule_New((void *)funcdata, NULL, _loop1d_list_free);
if (cobj == NULL) {
goto fail;
}
@@ -5312,7 +5318,10 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
* is exactly like this one, then just replace.
* Otherwise insert.
*/
- current = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(cobj);
+ current = PyCapsule_GetPointer(cobj, NULL);
+ if (current == NULL) {
+ goto fail;
+ }
while (current != NULL) {
cmp = cmp_arg_types(current->arg_types, newtypes, ufunc->nargs);
if (cmp >= 0) {
@@ -5383,7 +5392,7 @@ ufunc_dealloc(PyUFuncObject *ufunc)
static PyObject *
ufunc_repr(PyUFuncObject *ufunc)
{
- return PyUString_FromFormat("<ufunc '%s'>", ufunc->name);
+ return PyUnicode_FromFormat("<ufunc '%s'>", ufunc->name);
}
static int
@@ -5995,7 +6004,7 @@ ufunc_get_doc(PyUFuncObject *ufunc)
}
if (ufunc->doc != NULL) {
PyUString_ConcatAndDel(&doc,
- PyUString_FromFormat("\n\n%s", ufunc->doc));
+ PyUnicode_FromFormat("\n\n%s", ufunc->doc));
}
return doc;
}
@@ -6051,7 +6060,7 @@ ufunc_get_types(PyUFuncObject *ufunc)
t[ni + 2 + j] = _typecharfromnum(ufunc->types[n]);
n++;
}
- str = PyUString_FromStringAndSize(t, no + ni + 2);
+ str = PyUnicode_FromStringAndSize(t, no + ni + 2);
PyList_SET_ITEM(list, k, str);
}
PyArray_free(t);
@@ -6061,7 +6070,7 @@ ufunc_get_types(PyUFuncObject *ufunc)
static PyObject *
ufunc_get_name(PyUFuncObject *ufunc)
{
- return PyUString_FromString(ufunc->name);
+ return PyUnicode_FromString(ufunc->name);
}
static PyObject *
@@ -6077,7 +6086,7 @@ ufunc_get_signature(PyUFuncObject *ufunc)
if (!ufunc->core_enabled) {
Py_RETURN_NONE;
}
- return PyUString_FromString(ufunc->core_signature);
+ return PyUnicode_FromString(ufunc->core_signature);
}
#undef _typecharfromnum
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index fec3caef2..aa6f34d59 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -36,15 +36,15 @@ npy_casting_to_py_object(NPY_CASTING casting)
{
switch (casting) {
case NPY_NO_CASTING:
- return PyUString_FromString("no");
+ return PyUnicode_FromString("no");
case NPY_EQUIV_CASTING:
- return PyUString_FromString("equiv");
+ return PyUnicode_FromString("equiv");
case NPY_SAFE_CASTING:
- return PyUString_FromString("safe");
+ return PyUnicode_FromString("safe");
case NPY_SAME_KIND_CASTING:
- return PyUString_FromString("same_kind");
+ return PyUnicode_FromString("same_kind");
case NPY_UNSAFE_CASTING:
- return PyUString_FromString("unsafe");
+ return PyUnicode_FromString("unsafe");
default:
return PyLong_FromLong(casting);
}
@@ -1336,7 +1336,6 @@ find_userloop(PyUFuncObject *ufunc,
void **out_innerloopdata)
{
npy_intp i, nin = ufunc->nin, j, nargs = nin + ufunc->nout;
- PyUFunc_Loop1d *funcdata;
/* Use this to try to avoid repeating the same userdef loop search */
int last_userdef = -1;
@@ -1368,9 +1367,11 @@ find_userloop(PyUFuncObject *ufunc,
else if (obj == NULL) {
continue;
}
- for (funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
- funcdata != NULL;
- funcdata = funcdata->next) {
+ PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
+ if (funcdata == NULL) {
+ return -1;
+ }
+ for (; funcdata != NULL; funcdata = funcdata->next) {
int *types = funcdata->arg_types;
for (j = 0; j < nargs; ++j) {
@@ -1744,7 +1745,6 @@ linear_search_userloop_type_resolver(PyUFuncObject *self,
char *out_err_dst_typecode)
{
npy_intp i, nop = self->nin + self->nout;
- PyUFunc_Loop1d *funcdata;
/* Use this to try to avoid repeating the same userdef loop search */
int last_userdef = -1;
@@ -1776,9 +1776,11 @@ linear_search_userloop_type_resolver(PyUFuncObject *self,
else if (obj == NULL) {
continue;
}
- for (funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
- funcdata != NULL;
- funcdata = funcdata->next) {
+ PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
+ if (funcdata == NULL) {
+ return -1;
+ }
+ for (; funcdata != NULL; funcdata = funcdata->next) {
int *types = funcdata->arg_types;
switch (ufunc_loop_matches(self, op,
input_casting, output_casting,
@@ -1816,7 +1818,6 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
PyArray_Descr **out_dtype)
{
int i, j, nin = self->nin, nop = nin + self->nout;
- PyUFunc_Loop1d *funcdata;
/* Use this to try to avoid repeating the same userdef loop search */
int last_userdef = -1;
@@ -1844,9 +1845,11 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
continue;
}
- for (funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
- funcdata != NULL;
- funcdata = funcdata->next) {
+ PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
+ if (funcdata == NULL) {
+ return -1;
+ }
+ for (; funcdata != NULL; funcdata = funcdata->next) {
int *types = funcdata->arg_types;
int matched = 1;
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 708a27ad0..ba7ac1706 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -237,23 +237,23 @@ NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_pyvals_name = NULL;
static int
intern_strings(void)
{
- if (!(npy_um_str_out = PyUString_InternFromString("out"))) return -1;
- if (!(npy_um_str_where = PyUString_InternFromString("where"))) return -1;
- if (!(npy_um_str_axes = PyUString_InternFromString("axes"))) return -1;
- if (!(npy_um_str_axis = PyUString_InternFromString("axis"))) return -1;
- if (!(npy_um_str_keepdims = PyUString_InternFromString("keepdims"))) return -1;
- if (!(npy_um_str_casting = PyUString_InternFromString("casting"))) return -1;
- if (!(npy_um_str_order = PyUString_InternFromString("order"))) return -1;
- if (!(npy_um_str_dtype = PyUString_InternFromString("dtype"))) return -1;
- if (!(npy_um_str_subok = PyUString_InternFromString("subok"))) return -1;
- if (!(npy_um_str_signature = PyUString_InternFromString("signature"))) return -1;
- if (!(npy_um_str_sig = PyUString_InternFromString("sig"))) return -1;
- if (!(npy_um_str_extobj = PyUString_InternFromString("extobj"))) return -1;
- if (!(npy_um_str_array_prepare = PyUString_InternFromString("__array_prepare__"))) return -1;
- if (!(npy_um_str_array_wrap = PyUString_InternFromString("__array_wrap__"))) return -1;
- if (!(npy_um_str_array_finalize = PyUString_InternFromString("__array_finalize__"))) return -1;
- if (!(npy_um_str_ufunc = PyUString_InternFromString("__array_ufunc__"))) return -1;
- if (!(npy_um_str_pyvals_name = PyUString_InternFromString(UFUNC_PYVALS_NAME))) return -1;
+ if (!(npy_um_str_out = PyUnicode_InternFromString("out"))) return -1;
+ if (!(npy_um_str_where = PyUnicode_InternFromString("where"))) return -1;
+ if (!(npy_um_str_axes = PyUnicode_InternFromString("axes"))) return -1;
+ if (!(npy_um_str_axis = PyUnicode_InternFromString("axis"))) return -1;
+ if (!(npy_um_str_keepdims = PyUnicode_InternFromString("keepdims"))) return -1;
+ if (!(npy_um_str_casting = PyUnicode_InternFromString("casting"))) return -1;
+ if (!(npy_um_str_order = PyUnicode_InternFromString("order"))) return -1;
+ if (!(npy_um_str_dtype = PyUnicode_InternFromString("dtype"))) return -1;
+ if (!(npy_um_str_subok = PyUnicode_InternFromString("subok"))) return -1;
+ if (!(npy_um_str_signature = PyUnicode_InternFromString("signature"))) return -1;
+ if (!(npy_um_str_sig = PyUnicode_InternFromString("sig"))) return -1;
+ if (!(npy_um_str_extobj = PyUnicode_InternFromString("extobj"))) return -1;
+ if (!(npy_um_str_array_prepare = PyUnicode_InternFromString("__array_prepare__"))) return -1;
+ if (!(npy_um_str_array_wrap = PyUnicode_InternFromString("__array_wrap__"))) return -1;
+ if (!(npy_um_str_array_finalize = PyUnicode_InternFromString("__array_finalize__"))) return -1;
+ if (!(npy_um_str_ufunc = PyUnicode_InternFromString("__array_ufunc__"))) return -1;
+ if (!(npy_um_str_pyvals_name = PyUnicode_InternFromString(UFUNC_PYVALS_NAME))) return -1;
return 0;
}
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 51cf7039f..a97198076 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -14,7 +14,7 @@ from numpy.testing import (
assert_raises_regex, assert_warns, suppress_warnings,
_assert_valid_refcount, HAS_REFCOUNT,
)
-from numpy.testing._private.utils import _no_tracing
+from numpy.testing._private.utils import _no_tracing, requires_memory
from numpy.compat import asbytes, asunicode, pickle
try:
@@ -2488,3 +2488,29 @@ class TestRegression:
assert arr.size * arr.itemsize > 2 ** 31
c_arr = np.ctypeslib.as_ctypes(arr)
assert_equal(c_arr._length_, arr.size)
+
+ def test_complex_conversion_error(self):
+ # gh-17068
+ with pytest.raises(TypeError, match=r"Unable to convert dtype.*"):
+ complex(np.array("now", np.datetime64))
+
+ def test__array_interface__descr(self):
+ # gh-17068
+ dt = np.dtype(dict(names=['a', 'b'],
+ offsets=[0, 0],
+ formats=[np.int64, np.int64]))
+ descr = np.array((1, 1), dtype=dt).__array_interface__['descr']
+ assert descr == [('', '|V8')] # instead of [(b'', '|V8')]
+
+ @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python')
+ @requires_memory(free_bytes=9e9)
+ def test_dot_big_stride(self):
+ # gh-17111
+ # blas stride = stride//itemsize > int32 max
+ int32_max = np.iinfo(np.int32).max
+ n = int32_max + 3
+ a = np.empty([n], dtype=np.float32)
+ b = a[::n-1]
+ b[...] = 1
+ assert b.strides[0] > int32_max * b.dtype.itemsize
+ assert np.dot(b, b) == 2.0
diff --git a/numpy/ctypeslib.py b/numpy/ctypeslib.py
index 76ba838b7..e8f7750fe 100644
--- a/numpy/ctypeslib.py
+++ b/numpy/ctypeslib.py
@@ -49,12 +49,11 @@ Then, we're ready to call ``foo_func``:
>>> _lib.foo_func(out, len(out)) #doctest: +SKIP
"""
-__all__ = ['load_library', 'ndpointer', 'ctypes_load_library',
- 'c_intp', 'as_ctypes', 'as_array']
+__all__ = ['load_library', 'ndpointer', 'c_intp', 'as_ctypes', 'as_array']
import os
from numpy import (
- integer, ndarray, dtype as _dtype, deprecate, array, frombuffer
+ integer, ndarray, dtype as _dtype, array, frombuffer
)
from numpy.core.multiarray import _flagdict, flagsobj
@@ -75,7 +74,6 @@ if ctypes is None:
"""
raise ImportError("ctypes is not available.")
- ctypes_load_library = _dummy
load_library = _dummy
as_ctypes = _dummy
as_array = _dummy
@@ -154,8 +152,6 @@ else:
## if no successful return in the libname_ext loop:
raise OSError("no file with expected extension")
- ctypes_load_library = deprecate(load_library, 'ctypes_load_library',
- 'load_library')
def _num_fromflags(flaglist):
num = 0
diff --git a/numpy/f2py/cfuncs.py b/numpy/f2py/cfuncs.py
index ccbc9b0fb..9f5c73a45 100644
--- a/numpy/f2py/cfuncs.py
+++ b/numpy/f2py/cfuncs.py
@@ -320,10 +320,10 @@ cppmacros[
'pyobj_from_complex_float1'] = '#define pyobj_from_complex_float1(v) (PyComplex_FromDoubles(v.r,v.i))'
needs['pyobj_from_string1'] = ['string']
cppmacros[
- 'pyobj_from_string1'] = '#define pyobj_from_string1(v) (PyString_FromString((char *)v))'
+ 'pyobj_from_string1'] = '#define pyobj_from_string1(v) (PyUnicode_FromString((char *)v))'
needs['pyobj_from_string1size'] = ['string']
cppmacros[
- 'pyobj_from_string1size'] = '#define pyobj_from_string1size(v,len) (PyUString_FromStringAndSize((char *)v, len))'
+ 'pyobj_from_string1size'] = '#define pyobj_from_string1size(v,len) (PyUnicode_FromStringAndSize((char *)v, len))'
needs['TRYPYARRAYTEMPLATE'] = ['PRINTPYOBJERR']
cppmacros['TRYPYARRAYTEMPLATE'] = """\
/* New SciPy */
diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py
index 56f2033ff..a14f60194 100755
--- a/numpy/f2py/rules.py
+++ b/numpy/f2py/rules.py
@@ -202,7 +202,7 @@ PyMODINIT_FUNC PyInit_#modulename#(void) {
\tif (PyErr_Occurred())
\t\t{PyErr_SetString(PyExc_ImportError, \"can't initialize module #modulename# (failed to import numpy)\"); return m;}
\td = PyModule_GetDict(m);
-\ts = PyString_FromString(\"$R""" + """evision: $\");
+\ts = PyUnicode_FromString(\"$R""" + """evision: $\");
\tPyDict_SetItemString(d, \"__version__\", s);
\tPy_DECREF(s);
\ts = PyUnicode_FromString(
diff --git a/numpy/f2py/src/test/foomodule.c b/numpy/f2py/src/test/foomodule.c
index caf3590d4..88ec62440 100644
--- a/numpy/f2py/src/test/foomodule.c
+++ b/numpy/f2py/src/test/foomodule.c
@@ -121,7 +121,7 @@ void initfoo() {
m = Py_InitModule("foo", foo_module_methods);
d = PyModule_GetDict(m);
- s = PyString_FromString("This module 'foo' demonstrates the usage of fortranobject.");
+ s = PyUnicode_FromString("This module 'foo' demonstrates the usage of fortranobject.");
PyDict_SetItemString(d, "__doc__", s);
/* Fortran objects: */
diff --git a/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c b/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c
index 8b089d334..0411b62e0 100644
--- a/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c
+++ b/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c
@@ -1,14 +1,9 @@
-/* File: wrapmodule.c
- * This file is auto-generated with f2py (version:2_1330).
- * Hand edited by Pearu.
- * f2py is a Fortran to Python Interface Generator (FPIG), Second Edition,
- * written by Pearu Peterson <pearu@cens.ioc.ee>.
- * See http://cens.ioc.ee/projects/f2py2e/
- * Generation date: Fri Oct 21 22:41:12 2005
- * $Revision:$
- * $Date:$
- * Do not edit this file directly unless you know what you are doing!!!
+/*
+ * This file was auto-generated with f2py (version:2_1330) and hand edited by
+ * Pearu for testing purposes. Do not edit this file unless you know what you
+ * are doing!!!
*/
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -149,9 +144,9 @@ PyMODINIT_FUNC PyInit_test_array_from_pyobj_ext(void) {
if (PyErr_Occurred())
Py_FatalError("can't initialize module wrap (failed to import numpy)");
d = PyModule_GetDict(m);
- s = PyString_FromString("This module 'wrap' is auto-generated with f2py (version:2_1330).\nFunctions:\n"
-" arr = call(type_num,dims,intent,obj)\n"
-".");
+ s = PyUnicode_FromString("This module 'wrap' is auto-generated with f2py (version:2_1330).\nFunctions:\n"
+ " arr = call(type_num,dims,intent,obj)\n"
+ ".");
PyDict_SetItemString(d, "__doc__", s);
wrap_error = PyErr_NewException ("wrap.error", NULL, NULL);
Py_DECREF(s);
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index df9a110c5..6a2ad004c 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -278,7 +278,7 @@ def unique(ar, return_index=False, return_inverse=False,
ar = np.moveaxis(ar, axis, 0)
except np.AxisError:
# this removes the "axis1" or "axis2" prefix from the error message
- raise np.AxisError(axis, ar.ndim)
+ raise np.AxisError(axis, ar.ndim) from None
# Must reshape to a contiguous 2D array for this to work...
orig_shape, orig_dtype = ar.shape, ar.dtype
@@ -300,10 +300,10 @@ def unique(ar, return_index=False, return_inverse=False,
# array with shape `(len(ar),)`. Because `dtype` in this case has
# itemsize 0, the total size of the result is still 0 bytes.
consolidated = np.empty(len(ar), dtype=dtype)
- except TypeError:
+ except TypeError as e:
# There's no good way to do this for object arrays, etc...
msg = 'The axis argument to unique is not supported for dtype {dt}'
- raise TypeError(msg.format(dt=ar.dtype))
+ raise TypeError(msg.format(dt=ar.dtype)) from e
def reshape_uniq(uniq):
n = len(uniq)
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 556227c0d..0db00a0f2 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1991,8 +1991,8 @@ class vectorize:
.. versionadded:: 1.7.0
cache : bool, optional
- If `True`, then cache the first function call that determines the number
- of outputs if `otypes` is not provided.
+ If `True`, then cache the first function call that determines the number
+ of outputs if `otypes` is not provided.
.. versionadded:: 1.7.0
diff --git a/numpy/lib/tests/test_financial_expired.py b/numpy/lib/tests/test_financial_expired.py
index e1d05da0c..66bb08026 100644
--- a/numpy/lib/tests/test_financial_expired.py
+++ b/numpy/lib/tests/test_financial_expired.py
@@ -3,10 +3,11 @@ import pytest
import numpy as np
+@pytest.mark.skipif(sys.version_info[:2] < (3, 7),
+ reason="requires python 3.7 or higher")
def test_financial_expired():
- if sys.version_info[:2] >= (3, 7):
- match = 'NEP 32'
- else:
- match = None
- with pytest.raises(AttributeError, match=match):
- np.fv
+ match = 'NEP 32'
+ with pytest.warns(RuntimeWarning, match=match):
+ func = np.fv
+ with pytest.raises(RuntimeError, match=match):
+ func(1, 2, 3)
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.c.src
index 59647c67d..1807aadcf 100644
--- a/numpy/linalg/umath_linalg.c.src
+++ b/numpy/linalg/umath_linalg.c.src
@@ -3665,7 +3665,7 @@ PyObject *PyInit__umath_linalg(void)
return NULL;
}
- version = PyString_FromString(umath_linalg_version_string);
+ version = PyUnicode_FromString(umath_linalg_version_string);
if (version == NULL) {
return NULL;
}
diff --git a/numpy/ma/timer_comparison.py b/numpy/ma/timer_comparison.py
index 83bd7852e..f5855efcf 100644
--- a/numpy/ma/timer_comparison.py
+++ b/numpy/ma/timer_comparison.py
@@ -100,9 +100,9 @@ class ModuleTester:
header=header,
names=('x', 'y'))
assert cond, msg
- except ValueError:
+ except ValueError as e:
msg = build_err_msg([x, y], err_msg, header=header, names=('x', 'y'))
- raise ValueError(msg)
+ raise ValueError(msg) from e
def assert_array_equal(self, x, y, err_msg=''):
"""
diff --git a/numpy/tests/typing/fail/fromnumeric.py b/numpy/tests/typing/fail/fromnumeric.py
index 66f8a89d0..c9156895d 100644
--- a/numpy/tests/typing/fail/fromnumeric.py
+++ b/numpy/tests/typing/fail/fromnumeric.py
@@ -124,3 +124,31 @@ np.amin(a, keepdims=1.0) # E: No overload variant of "amin" matches argument ty
np.amin(a, out=1.0) # E: No overload variant of "amin" matches argument type
np.amin(a, initial=[1.0]) # E: No overload variant of "amin" matches argument type
np.amin(a, where=[1.0]) # E: List item 0 has incompatible type
+
+np.prod(a, axis=1.0) # E: No overload variant of "prod" matches argument type
+np.prod(a, out=False) # E: No overload variant of "prod" matches argument type
+np.prod(a, keepdims=1.0) # E: No overload variant of "prod" matches argument type
+np.prod(a, initial=int) # E: No overload variant of "prod" matches argument type
+np.prod(a, where=1.0) # E: No overload variant of "prod" matches argument type
+
+np.cumprod(a, axis=1.0) # E: Argument "axis" to "cumprod" has incompatible type
+np.cumprod(a, out=False) # E: Argument "out" to "cumprod" has incompatible type
+
+np.size(a, axis=1.0) # E: Argument "axis" to "size" has incompatible type
+
+np.around(a, decimals=1.0) # E: No overload variant of "around" matches argument type
+np.around(a, out=type) # E: No overload variant of "around" matches argument type
+
+np.mean(a, axis=1.0) # E: No overload variant of "mean" matches argument type
+np.mean(a, out=False) # E: No overload variant of "mean" matches argument type
+np.mean(a, keepdims=1.0) # E: No overload variant of "mean" matches argument type
+
+np.std(a, axis=1.0) # E: No overload variant of "std" matches argument type
+np.std(a, out=False) # E: No overload variant of "std" matches argument type
+np.std(a, ddof='test') # E: No overload variant of "std" matches argument type
+np.std(a, keepdims=1.0) # E: No overload variant of "std" matches argument type
+
+np.var(a, axis=1.0) # E: No overload variant of "var" matches argument type
+np.var(a, out=False) # E: No overload variant of "var" matches argument type
+np.var(a, ddof='test') # E: No overload variant of "var" matches argument type
+np.var(a, keepdims=1.0) # E: No overload variant of "var" matches argument type
diff --git a/numpy/tests/typing/pass/fromnumeric.py b/numpy/tests/typing/pass/fromnumeric.py
index d9dd45c54..9e936e684 100644
--- a/numpy/tests/typing/pass/fromnumeric.py
+++ b/numpy/tests/typing/pass/fromnumeric.py
@@ -10,6 +10,7 @@ B.setflags(write=False)
a = np.bool_(True)
b = np.float32(1.0)
c = 1.0
+d = np.array(1.0, dtype=np.float32) # writeable
np.take(a, 0)
np.take(b, 0)
@@ -183,3 +184,77 @@ np.amin(A, axis=0)
np.amin(B, axis=0)
np.amin(A, keepdims=True)
np.amin(B, keepdims=True)
+
+np.prod(a)
+np.prod(b)
+np.prod(c)
+np.prod(A)
+np.prod(B)
+np.prod(a, dtype=None)
+np.prod(A, dtype=None)
+np.prod(A, axis=0)
+np.prod(B, axis=0)
+np.prod(A, keepdims=True)
+np.prod(B, keepdims=True)
+np.prod(b, out=d)
+np.prod(B, out=d)
+
+np.cumprod(a)
+np.cumprod(b)
+np.cumprod(c)
+np.cumprod(A)
+np.cumprod(B)
+
+np.ndim(a)
+np.ndim(b)
+np.ndim(c)
+np.ndim(A)
+np.ndim(B)
+
+np.size(a)
+np.size(b)
+np.size(c)
+np.size(A)
+np.size(B)
+
+np.around(a)
+np.around(b)
+np.around(c)
+np.around(A)
+np.around(B)
+
+np.mean(a)
+np.mean(b)
+np.mean(c)
+np.mean(A)
+np.mean(B)
+np.mean(A, axis=0)
+np.mean(B, axis=0)
+np.mean(A, keepdims=True)
+np.mean(B, keepdims=True)
+np.mean(b, out=d)
+np.mean(B, out=d)
+
+np.std(a)
+np.std(b)
+np.std(c)
+np.std(A)
+np.std(B)
+np.std(A, axis=0)
+np.std(B, axis=0)
+np.std(A, keepdims=True)
+np.std(B, keepdims=True)
+np.std(b, out=d)
+np.std(B, out=d)
+
+np.var(a)
+np.var(b)
+np.var(c)
+np.var(A)
+np.var(B)
+np.var(A, axis=0)
+np.var(B, axis=0)
+np.var(A, keepdims=True)
+np.var(B, keepdims=True)
+np.var(b, out=d)
+np.var(B, out=d)
diff --git a/numpy/tests/typing/reveal/fromnumeric.py b/numpy/tests/typing/reveal/fromnumeric.py
index f5feb3f5f..06501f6e2 100644
--- a/numpy/tests/typing/reveal/fromnumeric.py
+++ b/numpy/tests/typing/reveal/fromnumeric.py
@@ -10,6 +10,7 @@ B.setflags(write=False)
a = np.bool_(True)
b = np.float32(1.0)
c = 1.0
+d = np.array(1.0, dtype=np.float32) # writeable
reveal_type(np.take(a, 0)) # E: numpy.bool_
reveal_type(np.take(b, 0)) # E: numpy.float32
@@ -203,3 +204,75 @@ reveal_type(np.amin(A, axis=0)) # E: Union[numpy.number, numpy.ndarray]
reveal_type(np.amin(B, axis=0)) # E: Union[numpy.number, numpy.ndarray]
reveal_type(np.amin(A, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
reveal_type(np.amin(B, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+
+reveal_type(np.prod(a)) # E: numpy.number
+reveal_type(np.prod(b)) # E: numpy.float32
+reveal_type(np.prod(c)) # E: numpy.number
+reveal_type(np.prod(A)) # E: numpy.number
+reveal_type(np.prod(B)) # E: numpy.number
+reveal_type(np.prod(A, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.prod(B, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.prod(A, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.prod(B, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.prod(b, out=d)) # E: numpy.ndarray
+reveal_type(np.prod(B, out=d)) # E: numpy.ndarray
+
+reveal_type(np.cumprod(a)) # E: numpy.ndarray
+reveal_type(np.cumprod(b)) # E: numpy.ndarray
+reveal_type(np.cumprod(c)) # E: numpy.ndarray
+reveal_type(np.cumprod(A)) # E: numpy.ndarray
+reveal_type(np.cumprod(B)) # E: numpy.ndarray
+
+reveal_type(np.ndim(a)) # E: int
+reveal_type(np.ndim(b)) # E: int
+reveal_type(np.ndim(c)) # E: int
+reveal_type(np.ndim(A)) # E: int
+reveal_type(np.ndim(B)) # E: int
+
+reveal_type(np.size(a)) # E: int
+reveal_type(np.size(b)) # E: int
+reveal_type(np.size(c)) # E: int
+reveal_type(np.size(A)) # E: int
+reveal_type(np.size(B)) # E: int
+
+reveal_type(np.around(a)) # E: numpy.number
+reveal_type(np.around(b)) # E: numpy.float32
+reveal_type(np.around(c)) # E: numpy.number
+reveal_type(np.around(A)) # E: numpy.ndarray
+reveal_type(np.around(B)) # E: numpy.ndarray
+
+reveal_type(np.mean(a)) # E: numpy.number
+reveal_type(np.mean(b)) # E: numpy.number
+reveal_type(np.mean(c)) # E: numpy.number
+reveal_type(np.mean(A)) # E: numpy.number
+reveal_type(np.mean(B)) # E: numpy.number
+reveal_type(np.mean(A, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.mean(B, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.mean(A, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.mean(B, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.mean(b, out=d)) # E: numpy.ndarray
+reveal_type(np.mean(B, out=d)) # E: numpy.ndarray
+
+reveal_type(np.std(a)) # E: numpy.number
+reveal_type(np.std(b)) # E: numpy.number
+reveal_type(np.std(c)) # E: numpy.number
+reveal_type(np.std(A)) # E: numpy.number
+reveal_type(np.std(B)) # E: numpy.number
+reveal_type(np.std(A, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.std(B, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.std(A, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.std(B, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.std(b, out=d)) # E: numpy.ndarray
+reveal_type(np.std(B, out=d)) # E: numpy.ndarray
+
+reveal_type(np.var(a)) # E: numpy.number
+reveal_type(np.var(b)) # E: numpy.number
+reveal_type(np.var(c)) # E: numpy.number
+reveal_type(np.var(A)) # E: numpy.number
+reveal_type(np.var(B)) # E: numpy.number
+reveal_type(np.var(A, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.var(B, axis=0)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.var(A, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.var(B, keepdims=True)) # E: Union[numpy.number, numpy.ndarray]
+reveal_type(np.var(b, out=d)) # E: numpy.ndarray
+reveal_type(np.var(B, out=d)) # E: numpy.ndarray
diff --git a/runtests.py b/runtests.py
index 8aefab0db..ce351e3c4 100755
--- a/runtests.py
+++ b/runtests.py
@@ -108,6 +108,8 @@ def main(argv):
help="Start IPython shell with PYTHONPATH set")
parser.add_argument("--shell", action="store_true",
help="Start Unix shell with PYTHONPATH set")
+ parser.add_argument("--mypy", action="store_true",
+ help="Run mypy on files with NumPy on the MYPYPATH")
parser.add_argument("--debug", "-g", action="store_true",
help="Debug build")
parser.add_argument("--parallel", "-j", type=int, default=0,
@@ -131,7 +133,7 @@ def main(argv):
"COMMIT. Note that you need to commit your "
"changes first!"))
parser.add_argument("args", metavar="ARGS", default=[], nargs=REMAINDER,
- help="Arguments to pass to Nose, asv, Python or shell")
+ help="Arguments to pass to pytest, asv, mypy, Python or shell")
args = parser.parse_args(argv)
if args.durations < 0:
@@ -211,6 +213,35 @@ def main(argv):
subprocess.call([shell] + extra_argv)
sys.exit(0)
+ if args.mypy:
+ try:
+ import mypy.api
+ except ImportError:
+ raise RuntimeError(
+ "Mypy not found. Please install it by running "
+ "pip install -r test_requirements.txt from the repo root"
+ )
+
+ os.environ['MYPYPATH'] = site_dir
+ # By default mypy won't color the output since it isn't being
+ # invoked from a tty.
+ os.environ['MYPY_FORCE_COLOR'] = '1'
+
+ config = os.path.join(
+ site_dir,
+ "numpy",
+ "tests",
+ "typing",
+ "mypy.ini",
+ )
+
+ report, errors, status = mypy.api.run(
+ ['--config-file', config] + args.args
+ )
+ print(report, end='')
+ print(errors, end='', file=sys.stderr)
+ sys.exit(status)
+
if args.coverage:
dst_dir = os.path.join(ROOT_DIR, 'build', 'coverage')
fn = os.path.join(dst_dir, 'coverage_html.js')