summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/memcpy.S
diff options
context:
space:
mode:
authorOndrej Bilka <neleai@seznam.cz>2013-05-20 08:20:00 +0200
committerOndrej Bilka <neleai@seznam.cz>2013-05-20 08:24:41 +0200
commit2d48b41c8fa610067c4d664ac2339ae6ca43e78c (patch)
tree4d1ca07a1228ba16c12a67ddb08595770f397da1 /sysdeps/x86_64/multiarch/memcpy.S
parent3e694268750d51acc6a68b0ee7ded25a52902c20 (diff)
downloadglibc-2d48b41c8fa610067c4d664ac2339ae6ca43e78c.tar.gz
Faster memcpy on x64.
We add new memcpy version that uses unaligned loads which are fast on modern processors. This allows second improvement which is avoiding computed jump which is relatively expensive operation. Tests available here: http://kam.mff.cuni.cz/~ondra/memcpy_profile_result27_04_13.tar.bz2
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy.S')
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S15
1 files changed, 8 insertions, 7 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index b452f5304b..a1e5031376 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -33,13 +33,14 @@ ENTRY(__new_memcpy)
jne 1f
call __init_cpu_features
1: leaq __memcpy_sse2(%rip), %rax
- testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jz 2f
- leaq __memcpy_ssse3(%rip), %rax
- testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
- jz 2f
- leaq __memcpy_ssse3_back(%rip), %rax
-2: ret
+ testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
+ jnz 2f
+ leaq __memcpy_sse2_unaligned(%rip), %rax
+ ret
+2: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ jz 3f
+ leaq __memcpy_ssse3(%rip), %rax
+3: ret
END(__new_memcpy)
# undef ENTRY