summaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2022-11-14 21:34:57 +0200
committerLasse Collin <lasse.collin@tukaani.org>2022-11-14 23:05:46 +0200
commitf644473a211394447824ea00518d0a214ff3f7f2 (patch)
tree8fc19fa7f3811b090f3f73398ce8bd0708d10a53 /CMakeLists.txt
parent3b466bc79672bb2b06d1245a500588e6026e0ba0 (diff)
downloadxz-f644473a211394447824ea00518d0a214ff3f7f2.tar.gz
liblzma: Add fast CRC64 for 32/64-bit x86 using SSSE3 + SSE4.1 + CLMUL.
It also works on E2K as it supports these intrinsics. On x86-64 runtime detection is used so the code keeps working on older processors too. A CLMUL-only build can be done by using -msse4.1 -mpclmul in CFLAGS and this will reduce the library size since the generic implementation and its 8 KiB lookup table will be omitted. On 32-bit x86 this isn't used by default for now because by default on 32-bit x86 the separate assembly file crc64_x86.S is used. If --disable-assembler is used then this new CLMUL code is used the same way as on 64-bit x86. However, a CLMUL-only build (-msse4.1 -mpclmul) won't omit the 8 KiB lookup table on 32-bit x86 due to a currently-missing check for disabled assembler usage. The configure.ac check should be such that the code won't be built if something in the toolchain doesn't support it but --disable-clmul-crc option can be used to unconditionally disable this feature. CLMUL speeds up decompression of files that have compressed very well (assuming CRC64 is used as a check type). It is know that the CLMUL code is significantly slower than the generic code for tiny inputs (especially 1-8 bytes but up to 16 bytes). If that is a real-world problem then there is already a commented-out variant that uses the generic version for small inputs. Thanks to Ilya Kurdyukov for the original patch which was derived from a white paper from Intel [1] (published in 2009) and public domain code from [2] (released in 2016). [1] https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf [2] https://github.com/rawrunprotected/crc
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt26
1 files changed, 24 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11f3a05..ec44bac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,8 +49,10 @@
cmake_minimum_required(VERSION 3.13...3.16 FATAL_ERROR)
include(CMakePushCheckState)
+include(CheckIncludeFile)
include(CheckSymbolExists)
include(CheckStructHasMember)
+include(CheckCSourceCompiles)
include(cmake/tuklib_integer.cmake)
include(cmake/tuklib_cpucores.cmake)
include(cmake/tuklib_physmem.cmake)
@@ -402,14 +404,16 @@ check_c_source_compiles("
cmake_pop_check_state()
tuklib_add_definition_if(liblzma HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
+# cpuid.h
+check_include_file(cpuid.h HAVE_CPUID_H)
+tuklib_add_definition_if(liblzma HAVE_CPUID_H)
+
# immintrin.h:
-include(CheckIncludeFile)
check_include_file(immintrin.h HAVE_IMMINTRIN_H)
if(HAVE_IMMINTRIN_H)
target_compile_definitions(liblzma PRIVATE HAVE_IMMINTRIN_H)
# SSE2 intrinsics:
- include(CheckCSourceCompiles)
check_c_source_compiles("
#include <immintrin.h>
int main(void)
@@ -421,6 +425,24 @@ if(HAVE_IMMINTRIN_H)
"
HAVE__MM_MOVEMASK_EPI8)
tuklib_add_definition_if(liblzma HAVE__MM_MOVEMASK_EPI8)
+
+ # CLMUL intrinsic:
+ check_c_source_compiles("
+ #include <immintrin.h>
+ #if defined(__e2k__) && __iset__ < 6
+ # error
+ #endif
+ #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
+ __attribute__((__target__(\"ssse3,sse4.1,pclmul\")))
+ #endif
+ __m128i my_clmul(__m128i a, __m128i b)
+ {
+ return _mm_clmulepi64_si128(a, b, 0);
+ }
+ int main(void) { return 0; }
+ "
+ HAVE_USABLE_CLMUL)
+ tuklib_add_definition_if(liblzma HAVE_USABLE_CLMUL)
endif()
# Support -fvisiblity=hidden when building shared liblzma.