diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:44:13 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-01-15 12:44:13 -0800 |
commit | bb83a138162885366c6c5b974caa87ac03f33e2c (patch) | |
tree | a37e6b64c024507165f1fc4e45c18c336749b032 | |
parent | a806d37903f0db0081a49a440b1f3a82f5e65916 (diff) | |
download | glibc-hjl/avx512f-mem/master.tar.gz |
Always use prefetchnta with non-temporal storeshjl/avx512f-mem/master
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S index 56e727bfc9..6492c4f6b7 100644 --- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S @@ -340,15 +340,15 @@ L(preloop_large): sub %rdi, %r8 sub %r8, %rsi add %r8, %rdx - prefetcht1 (%rsi) - prefetcht1 0x40(%rsi) - prefetcht1 0x80(%rsi) - prefetcht1 0xC0(%rsi) + prefetchnta (%rsi) + prefetchnta 0x40(%rsi) + prefetchnta 0x80(%rsi) + prefetchnta 0xC0(%rsi) L(gobble_256bytes_nt_loop): - prefetcht1 0x200(%rsi) - prefetcht1 0x240(%rsi) - prefetcht1 0x280(%rsi) - prefetcht1 0x2C0(%rsi) + prefetchnta 0x200(%rsi) + prefetchnta 0x240(%rsi) + prefetchnta 0x280(%rsi) + prefetchnta 0x2C0(%rsi) prefetchnta 0x300(%rsi) prefetchnta 0x340(%rsi) prefetchnta 0x380(%rsi) @@ -383,10 +383,10 @@ L(preloop_large_bkw): sub %r8, %rdx add %r9, %r8 L(gobble_256bytes_nt_loop_bkw): - prefetcht1 -0x300(%rcx) - prefetcht1 -0x2C0(%rcx) - prefetcht1 -0x280(%rcx) - prefetcht1 -0x240(%rcx) + prefetchnta -0x300(%rcx) + prefetchnta -0x2C0(%rcx) + prefetchnta -0x280(%rcx) + prefetchnta -0x240(%rcx) prefetchnta -0x400(%rcx) prefetchnta -0x3C0(%rcx) prefetchnta -0x380(%rcx) |