diff options
-rw-r--r-- | benchmarks/benchmarks/bench_linalg.py | 36 | ||||
-rw-r--r-- | doc/release/1.11.0-notes.rst | 7 |
2 files changed, 31 insertions, 12 deletions
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py index 3d26b800c..a65d510be 100644 --- a/benchmarks/benchmarks/bench_linalg.py +++ b/benchmarks/benchmarks/bench_linalg.py @@ -18,12 +18,12 @@ class Eindot(Benchmark): self.a3 = np.arange(480000.).reshape(60, 80, 100) self.b3 = np.arange(192000.).reshape(80, 60, 40) - def time_einsum_ij_jk_a_b(self): - np.einsum('ij,jk', self.a, self.b) - def time_dot_a_b(self): np.dot(self.a, self.b) + def time_dot_d_dot_b_c(self): + np.dot(self.d, np.dot(self.b, self.c)) + def time_dot_trans_a_at(self): np.dot(self.a, self.at) @@ -36,20 +36,38 @@ class Eindot(Benchmark): def time_dot_trans_atc_a(self): np.dot(self.atc, self.a) + def time_einsum_i_ij_j(self): + np.einsum('i,ij,j', self.d, self.b, self.c) + + def time_einsum_ij_jk_a_b(self): + np.einsum('ij,jk', self.a, self.b) + + def time_einsum_ijk_jil_kl(self): + np.einsum('ijk,jil->kl', self.a3, self.b3) + def time_inner_trans_a_a(self): np.inner(self.a, self.a) def time_inner_trans_a_ac(self): np.inner(self.a, self.ac) - def time_einsum_i_ij_j(self): - np.einsum('i,ij,j', self.d, self.b, self.c) + def time_matmul_a_b(self): + np.matmul(self.a, self.b) - def time_dot_d_dot_b_c(self): - np.dot(self.d, np.dot(self.b, self.c)) + def time_matmul_d_matmul_b_c(self): + np.matmul(self.d, np.matmul(self.b, self.c)) - def time_einsum_ijk_jil_kl(self): - np.einsum('ijk,jil->kl', self.a3, self.b3) + def time_matmul_trans_a_at(self): + np.matmul(self.a, self.at) + + def time_matmul_trans_a_atc(self): + np.matmul(self.a, self.atc) + + def time_matmul_trans_at_a(self): + np.matmul(self.at, self.a) + + def time_matmul_trans_atc_a(self): + np.matmul(self.atc, self.a) def time_tensordot_a_b_axes_1_0_0_1(self): np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1])) diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst index 16af02440..c4ff89230 100644 --- a/doc/release/1.11.0-notes.rst +++ b/doc/release/1.11.0-notes.rst @@ -149,11 +149,12 @@ useless computations when printing a masked array. The function now uses the fallocate system call to reserve sufficient diskspace on filesystems that support it. -``np.dot`` optimized for operations of the form ``A.T @ A`` and ``A @ A.T`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Optimizations for operations of the form ``A.T @ A`` and ``A @ A.T`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Previously, ``gemm`` BLAS operations were used for all matrix products. Now, if the matrix product is between a matrix and its transpose, it will use -``syrk`` BLAS operations for a performance boost. +``syrk`` BLAS operations for a performance boost. This optimization has been +extended to ``@``, ``numpy.dot``, ``numpy.inner``, and ``numpy.matmul``. **Note:** Requires the transposed and non-transposed matrices to share data. |