diff options
author | Ondrej Bilka <neleai@seznam.cz> | 2013-05-20 08:20:00 +0200 |
---|---|---|
committer | Ondrej Bilka <neleai@seznam.cz> | 2013-05-20 08:24:41 +0200 |
commit | 2d48b41c8fa610067c4d664ac2339ae6ca43e78c (patch) | |
tree | 4d1ca07a1228ba16c12a67ddb08595770f397da1 /sysdeps/x86_64/multiarch/memcpy.S | |
parent | 3e694268750d51acc6a68b0ee7ded25a52902c20 (diff) | |
download | glibc-2d48b41c8fa610067c4d664ac2339ae6ca43e78c.tar.gz |
Faster memcpy on x64.
We add new memcpy version that uses unaligned loads which are fast
on modern processors. This allows second improvement which is avoiding
computed jump which is relatively expensive operation.
Tests available here:
http://kam.mff.cuni.cz/~ondra/memcpy_profile_result27_04_13.tar.bz2
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index b452f5304b..a1e5031376 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -33,13 +33,14 @@ ENTRY(__new_memcpy) jne 1f call __init_cpu_features 1: leaq __memcpy_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) - jz 2f - leaq __memcpy_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) - jz 2f - leaq __memcpy_ssse3_back(%rip), %rax -2: ret + testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) + jnz 2f + leaq __memcpy_sse2_unaligned(%rip), %rax + ret +2: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + jz 3f + leaq __memcpy_ssse3(%rip), %rax +3: ret END(__new_memcpy) # undef ENTRY |