summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-04-13 15:48:46 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-04-13 15:48:46 +0300
commit86ee48180f69e3b4a856b514872fcfe790c112b9 (patch)
treea329ae2d5d9acfbd69b7ebaf55e1e4548f8e74e2
parent9636b7cf5500da561729af8b60dcae6560b1df74 (diff)
downloadmariadb-git-10.5-pclmul.tar.gz
MDEV-24745 Generic CRC-32C computation wrongly uses SSE4.2 instructions10.5-pclmul
In commit d25f806d73d9984f0c9f2a346dcebb01a0eaa109 (MDEV-22749) the CRC-32C implementation of MariaDB was broken on some IA-32 and AMD64 builds, depending on the compiler version and build options. This was verified for IA-32 on GCC 10.2.1. Even though we try to identify the SSE4.2 extensions and the availaibility of the PCLMULQDQ instruction by executing CPUID, the fall-back code could be generated with extended instructions, because the entire file mysys/crc32/crc32c.c was being compiled with -msse4.2 -mpclmul. This would cause SIGILL on a PINSRD instruction on affected IA-32 targets (such as some Intel Atom processors). This might also affect old AMD64 processors (predating the 2007 Intel Nehalem microarchitecture), if some compiler chose to emit the offending instructions. While it is fine to pass a target-specific option to a target-specific compilation unit (like -mpclmul to a PCLMUL-specific compilation unit), that is not safe for mixed-architecture compilation units. For mixed-architecture compilation units, the correct way is to set target attributes on the target-specific functions. There does not seem to be a way to pass target attributes to function template instantiation. Hence, we must replace the ExtendImpl template with plain functions crc32_sse42() and crc32_slow(). We will also remove some inconsistency between my_crc32_implementation() and mysys_namespace::crc32::Choose_Extend(). The function crc32_pclmul_enabled() will be moved to mysys/crc32/crc32c.cc so that the detection code will be compiled without -msse4.2 -mpclmul. The AMD64 PCLMUL accelerated crc32c_3way() will be moved to a new file crc32c_amd64.cc. In this way, only a few functions that depend on -msse4.2 in mysys/crc32/crc32c.cc can be declared with __attribute__((target("sse4.2"))), and most of the file can be compiled for the generic target. Last, the file mysys/crc32ieee.cc will be omitted on 64-bit POWER, because it was dead code (no symbols were exported).
-rw-r--r--mysys/CMakeLists.txt32
-rw-r--r--mysys/crc32/crc32_x86.c28
-rw-r--r--mysys/crc32/crc32c.cc955
-rw-r--r--mysys/crc32/crc32c_amd64.cc711
-rw-r--r--mysys/crc32ieee.cc20
5 files changed, 879 insertions, 867 deletions
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index c6d476c9fa4..92269538c58 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -16,7 +16,7 @@
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
-SET(MYSYS_SOURCES array.c charset-def.c charset.c crc32ieee.cc my_default.c
+SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c
get_password.c
errors.c hash.c list.c
mf_cache.c mf_dirname.c mf_fn_ext.c
@@ -60,19 +60,29 @@ ENDIF()
IF(MSVC)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
+ IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
+ ENDIF()
ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
IF(CLANG_CL)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.cc crc32/crc32c.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
- MY_CHECK_C_COMPILER_FLAG(-msse4.2)
- MY_CHECK_C_COMPILER_FLAG(-mpclmul)
+ MY_CHECK_CXX_COMPILER_FLAG(-msse4.2)
+ MY_CHECK_CXX_COMPILER_FLAG(-mpclmul)
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H)
- IF(have_C__msse4.2 AND have_C__mpclmul AND HAVE_CPUID_H AND HAVE_X86INTRIN_H)
- SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c crc32/crc32c.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
- ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
+ IF(have_CXX__msse4.2 AND HAVE_CPUID_H)
+ ADD_DEFINITIONS(-DHAVE_SSE42)
+ IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H)
+ ADD_DEFINITIONS(-DHAVE_PCLMUL)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ ENDIF()
+ ENDIF()
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
IF(CMAKE_COMPILER_IS_GNUCC)
@@ -129,11 +139,15 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
COMPILE_FLAGS "-march=armv8-a+crc+crypto")
ENDIF()
ENDIF()
-ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64" OR CMAKE_SYSTEM_NAME MATCHES AIX)
+ENDIF()
+
+IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64" OR CMAKE_SYSTEM_NAME MATCHES AIX)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_ppc64.c crc32/crc32c_ppc.c)
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_ppc64.c crc32/crc32c_ppc.c PROPERTIES
COMPILE_FLAGS "${COMPILE_FLAGS} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector")
ADD_DEFINITIONS(-DHAVE_POWER8 -DHAS_ALTIVEC)
+ELSE()
+ SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32ieee.cc)
ENDIF()
IF(UNIX)
diff --git a/mysys/crc32/crc32_x86.c b/mysys/crc32/crc32_x86.c
index 1e5d2a0a089..f077399caca 100644
--- a/mysys/crc32/crc32_x86.c
+++ b/mysys/crc32/crc32_x86.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2020 MariaDB
+/* Copyright (c) 2020, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -55,38 +55,14 @@
#include <stdint.h>
#include <stddef.h>
-#if defined(__GNUC__)
+#ifdef __GNUC__
#include <x86intrin.h>
-#include <cpuid.h>
#elif defined(_MSC_VER)
#include <intrin.h>
#else
#error "unknown compiler"
#endif
-static int has_sse42_and_pclmul(uint32_t recx)
-{
- /* 1 << 20 is SSE42, 1 << 1 is PCLMULQDQ */
-#define bits_SSE42_AND_PCLMUL (1 << 20 | 1 << 1)
- return (recx & bits_SSE42_AND_PCLMUL) == bits_SSE42_AND_PCLMUL;
-}
-
-#ifdef __GNUC__
-int crc32_pclmul_enabled(void)
-{
- uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
- __cpuid(1, reax, rebx, recx, redx);
- return has_sse42_and_pclmul(recx);
-}
-#elif defined(_MSC_VER)
-int crc32_pclmul_enabled(void)
-{
- int regs[4];
- __cpuid(regs, 1);
- return has_sse42_and_pclmul(regs[2]);
-}
-#endif
-
/**
* @brief Shifts left 128 bit register by specified number of bytes
*
diff --git a/mysys/crc32/crc32c.cc b/mysys/crc32/crc32c.cc
index b6c80886ec1..082d467e7da 100644
--- a/mysys/crc32/crc32c.cc
+++ b/mysys/crc32/crc32c.cc
@@ -32,11 +32,20 @@ static inline uint32_t DecodeFixed32(const char *ptr)
#endif
#ifdef HAVE_SSE42
-#include <nmmintrin.h>
-#include <wmmintrin.h>
-#ifdef __GNUC__
-#include <cpuid.h>
-#endif
+# ifdef __GNUC__
+# include <cpuid.h>
+# if __GNUC__ < 5 && !defined __clang__
+/* the headers do not really work in GCC before version 5 */
+# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
+# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
+# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
+# else
+# include <nmmintrin.h>
+# endif
+# define USE_SSE42 __attribute__((target("sse4.2")))
+# else
+# define USE_SSE42 /* nothing */
+# endif
#endif
@@ -337,19 +346,8 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p));
}
-#if defined(HAVE_SSE42) && (SIZEOF_SIZE_T == 8)
-
-static inline uint64_t DecodeFixed64(const char *ptr)
+static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
{
- return uint8korr(ptr);
-}
-
-static inline uint64_t LE_LOAD64(const uint8_t *p) {
- return DecodeFixed64(reinterpret_cast<const char*>(p));
-}
-#endif
-
-static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4;
*l = table3_[c & 0xff] ^
@@ -365,27 +363,6 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
table0_[c >> 24];
}
-__attribute__((unused)) static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
-#ifndef HAVE_SSE42
- Slow_CRC32(l, p);
-#elif (SIZEOF_SIZE_T == 8)
- *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
- *p += 8;
-#else
- *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
- *p += 4;
- *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
- *p += 4;
-#endif
-}
-
-template<void (*CRC32)(uint64_t*, uint8_t const**)>
-uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
-
- const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
- const uint8_t *e = p + size;
- uint64_t l = crc ^ 0xffffffffu;
-
#ifdef ALIGN
#undef ALIGN
#endif
@@ -398,70 +375,115 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
l = table0_[c] ^ (l >> 8); \
} while (0)
+static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size)
+{
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint64_t l = crc ^ 0xffffffffu;
// Point x at first 16-byte aligned byte in string. This might be
// just past the end of the string.
const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
- if (x <= e) {
+ if (x <= e)
// Process bytes until finished or p is 16-byte aligned
- while (p != x) {
+ while (p != x)
STEP1;
- }
- }
// Process bytes 16 at a time
- while ((e-p) >= 16) {
- CRC32(&l, &p);
- CRC32(&l, &p);
+ while ((e-p) >= 16)
+ {
+ Slow_CRC32(&l, &p);
+ Slow_CRC32(&l, &p);
}
// Process bytes 8 at a time
- while ((e-p) >= 8) {
- CRC32(&l, &p);
- }
+ while ((e-p) >= 8)
+ Slow_CRC32(&l, &p);
// Process the last few bytes
- while (p != e) {
+ while (p != e)
STEP1;
- }
-#undef STEP1
-#undef ALIGN
return static_cast<uint32_t>(l ^ 0xffffffffu);
}
-// Detect if ARM64 CRC or not.
-#ifndef HAVE_ARMV8_CRC
-// Detect if SS42 or not.
-#ifndef HAVE_POWER8
+#if defined HAVE_POWER8
+#elif defined HAVE_ARMV8_CRC
+#elif defined HAVE_SSE42
+constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
+constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1;
-static bool isSSE42() {
-#ifndef HAVE_SSE42
- return false;
-#elif defined(__GNUC__)
+static uint32_t cpuid_ecx()
+{
+#ifdef __GNUC__
uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
__cpuid(1, reax, rebx, recx, redx);
- return (recx & ((int)1 << 20)) != 0;
-#elif defined(_MSC_VER)
- int info[4];
- __cpuid(info, 0x00000001);
- return (info[2] & ((int)1 << 20)) != 0;
+ return recx;
+#elif defined _MSC_VER
+ int regs[4];
+ __cpuid(regs, 1);
+ return regs[2];
#else
- return false;
+# error "unknown compiler"
#endif
}
-#ifdef HAVE_SSE42
-extern "C" int crc32_pclmul_enabled();
-#endif
+extern "C" int crc32_pclmul_enabled(void)
+{
+ return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL);
+}
-static bool isPCLMULQDQ() {
-#ifdef HAVE_SSE42
- return crc32_pclmul_enabled();
-#else
- return false;
+#if SIZEOF_SIZE_T == 8
+extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len);
+
+USE_SSE42
+static inline uint64_t LE_LOAD64(const uint8_t *ptr)
+{
+ return uint8korr(reinterpret_cast<const char*>(ptr));
+}
#endif
+
+USE_SSE42
+static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
+{
+# if (SIZEOF_SIZE_T == 8)
+ *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
+ *p += 8;
+# else
+ *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+ *p += 4;
+ *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+ *p += 4;
+# endif
}
-#endif // HAVE_POWER8
-#endif // HAVE_ARMV8_CRC
+USE_SSE42
+static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size)
+{
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint64_t l = crc ^ 0xffffffffu;
+
+ // Point x at first 16-byte aligned byte in string. This might be
+ // just past the end of the string.
+ const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
+ const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
+ if (x <= e)
+ // Process bytes until finished or p is 16-byte aligned
+ while (p != x)
+ STEP1;
+ // Process bytes 16 at a time
+ while ((e-p) >= 16)
+ {
+ Fast_CRC32(&l, &p);
+ Fast_CRC32(&l, &p);
+ }
+ // Process bytes 8 at a time
+ while ((e-p) >= 8)
+ Fast_CRC32(&l, &p);
+ // Process the last few bytes
+ while (p != e)
+ STEP1;
+ return static_cast<uint32_t>(l ^ 0xffffffffu);
+}
+#endif
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
@@ -507,14 +529,6 @@ static int arch_ppc_probe(void) {
return arch_ppc_crc32;
}
#endif // __linux__
-
-static bool isAltiVec() {
- if (arch_ppc_probe()) {
- return true;
- } else {
- return false;
- }
-}
#endif
#if defined(HAVE_ARMV8_CRC)
@@ -526,760 +540,59 @@ static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) {
}
#endif
-extern "C" const char * my_crc32c_implementation()
+static inline Function Choose_Extend()
{
-#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+#if defined HAVE_POWER8 && defined HAS_ALTIVEC
if (arch_ppc_probe())
- return "Using POWER8 crc32 instructions";
+ return ExtendPPCImpl;
#elif defined(HAVE_ARMV8_CRC)
- const char *ret = crc32c_aarch64_available();
- if (ret)
- return ret ;
+ if (crc32c_aarch64_available())
+ return ExtendARMImpl;
#elif HAVE_SSE42
- if (isSSE42())
- {
- if (SIZEOF_SIZE_T == 8 && isPCLMULQDQ())
- return "Using crc32 + pclmulqdq instructions";
- return "Using SSE4.2 crc32 instructions";
+# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
+ switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) {
+ case cpuid_ecx_SSE42_AND_PCLMUL:
+ return crc32c_3way;
+ case cpuid_ecx_SSE42:
+ return crc32c_sse42;
}
+# else
+ if (cpuid_ecx() & cpuid_ecx_SSE42)
+ return crc32c_sse42;
+# endif
#endif
- return "Using generic crc32 instructions";
-}
-
-
-/*
- * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the author be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- * Ferry Toth
- * ftoth@exalondelft.nl
- *
- * https://github.com/htot/crc32c
- *
- * Modified by Facebook
- *
- * Original intel whitepaper:
- * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
- * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- *
- * This version is from the folly library, created by Dave Watson <davejwatson@fb.com>
- *
-*/
-#if defined HAVE_SSE42 && defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
-
-
-#define CRCtriplet(crc, buf, offset) \
- crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
- crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
- crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
-
-#define CRCduplet(crc, buf, offset) \
- crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
- crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
-
-#define CRCsinglet(crc, buf, offset) \
- crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
-
-
-// Numbers taken directly from intel whitepaper.
-// clang-format off
-static const uint64_t clmul_constants[] = {
- 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
- 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
- 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
- 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
- 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
- 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
- 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
- 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
- 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
- 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
- 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
- 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
- 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
- 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
- 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
- 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
- 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
- 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
- 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
- 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
- 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
- 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
- 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
- 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
- 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
- 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
- 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
- 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
- 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
- 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
- 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
- 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
- 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
- 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
- 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
- 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
- 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
- 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
- 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
- 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
- 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
- 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
- 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
- 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
- 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
- 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
- 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
- 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
- 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
- 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
- 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
- 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
- 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
- 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
- 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
- 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
- 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
- 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
- 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
- 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
- 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
- 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
- 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
- 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
-};
-
-// Compute the crc32c value for buffer smaller than 8
-static inline void align_to_8(
- size_t len,
- uint64_t& crc0, // crc so far, updated on return
- const unsigned char*& next) { // next data pointer, updated on return
- uint32_t crc32bit = static_cast<uint32_t>(crc0);
- if (len & 0x04) {
- crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
- next += sizeof(uint32_t);
- }
- if (len & 0x02) {
- crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
- next += sizeof(uint16_t);
- }
- if (len & 0x01) {
- crc32bit = _mm_crc32_u8(crc32bit, *(next));
- next++;
- }
- crc0 = crc32bit;
-}
-
-//
-// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
-// chosen constant and xor's these with the remaining CRC.
-//
-static inline uint64_t CombineCRC(
- size_t block_size,
- uint64_t crc0,
- uint64_t crc1,
- uint64_t crc2,
- const uint64_t* next2) {
- const auto multiplier =
- *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
- const auto crc0_xmm = _mm_set_epi64x(0, crc0);
- const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
- const auto crc1_xmm = _mm_set_epi64x(0, crc1);
- const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
- const auto res = _mm_xor_si128(res0, res1);
- crc0 = _mm_cvtsi128_si64(res);
- crc0 = crc0 ^ *((uint64_t*)next2 - 1);
- crc2 = _mm_crc32_u64(crc2, crc0);
- return crc2;
+ return crc32c_slow;
}
-// Compute CRC-32C using the Intel hardware instruction.
-static inline uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
- const unsigned char* next = (const unsigned char*)buf;
- uint64_t count;
- uint64_t crc0, crc1, crc2;
- crc0 = crc ^ 0xffffffffu;
-
-
- if (len >= 8) {
- // if len > 216 then align and use triplets
- if (len > 216) {
- {
- // Work on the bytes (< 8) before the first 8-byte alignment addr starts
- auto align_bytes = (8 - (uintptr_t)next) & 7;
- len -= align_bytes;
- align_to_8(align_bytes, crc0, next);
- }
-
- // Now work on the remaining blocks
- count = len / 24; // number of triplets
- len %= 24; // bytes remaining
- uint64_t n = count >> 7; // #blocks = first block + full blocks
- uint64_t block_size = count & 127;
- if (block_size == 0) {
- block_size = 128;
- } else {
- n++;
- }
- // points to the first byte of the next block
- const uint64_t* next0 = (uint64_t*)next + block_size;
- const uint64_t* next1 = next0 + block_size;
- const uint64_t* next2 = next1 + block_size;
+static const Function ChosenExtend= Choose_Extend();
- crc1 = crc2 = 0;
- // Use Duff's device, a for() loop inside a switch()
- // statement. This needs to execute at least once, round len
- // down to nearest triplet multiple
- switch (block_size) {
- case 128:
- do {
- // jumps here for a full block of len 128
- CRCtriplet(crc, next, -128);
- /* fallthrough */
- case 127:
- // jumps here or below for the first block smaller
- CRCtriplet(crc, next, -127);
- /* fallthrough */
- case 126:
- CRCtriplet(crc, next, -126); // than 128
- /* fallthrough */
- case 125:
- CRCtriplet(crc, next, -125);
- /* fallthrough */
- case 124:
- CRCtriplet(crc, next, -124);
- /* fallthrough */
- case 123:
- CRCtriplet(crc, next, -123);
- /* fallthrough */
- case 122:
- CRCtriplet(crc, next, -122);
- /* fallthrough */
- case 121:
- CRCtriplet(crc, next, -121);
- /* fallthrough */
- case 120:
- CRCtriplet(crc, next, -120);
- /* fallthrough */
- case 119:
- CRCtriplet(crc, next, -119);
- /* fallthrough */
- case 118:
- CRCtriplet(crc, next, -118);
- /* fallthrough */
- case 117:
- CRCtriplet(crc, next, -117);
- /* fallthrough */
- case 116:
- CRCtriplet(crc, next, -116);
- /* fallthrough */
- case 115:
- CRCtriplet(crc, next, -115);
- /* fallthrough */
- case 114:
- CRCtriplet(crc, next, -114);
- /* fallthrough */
- case 113:
- CRCtriplet(crc, next, -113);
- /* fallthrough */
- case 112:
- CRCtriplet(crc, next, -112);
- /* fallthrough */
- case 111:
- CRCtriplet(crc, next, -111);
- /* fallthrough */
- case 110:
- CRCtriplet(crc, next, -110);
- /* fallthrough */
- case 109:
- CRCtriplet(crc, next, -109);
- /* fallthrough */
- case 108:
- CRCtriplet(crc, next, -108);
- /* fallthrough */
- case 107:
- CRCtriplet(crc, next, -107);
- /* fallthrough */
- case 106:
- CRCtriplet(crc, next, -106);
- /* fallthrough */
- case 105:
- CRCtriplet(crc, next, -105);
- /* fallthrough */
- case 104:
- CRCtriplet(crc, next, -104);
- /* fallthrough */
- case 103:
- CRCtriplet(crc, next, -103);
- /* fallthrough */
- case 102:
- CRCtriplet(crc, next, -102);
- /* fallthrough */
- case 101:
- CRCtriplet(crc, next, -101);
- /* fallthrough */
- case 100:
- CRCtriplet(crc, next, -100);
- /* fallthrough */
- case 99:
- CRCtriplet(crc, next, -99);
- /* fallthrough */
- case 98:
- CRCtriplet(crc, next, -98);
- /* fallthrough */
- case 97:
- CRCtriplet(crc, next, -97);
- /* fallthrough */
- case 96:
- CRCtriplet(crc, next, -96);
- /* fallthrough */
- case 95:
- CRCtriplet(crc, next, -95);
- /* fallthrough */
- case 94:
- CRCtriplet(crc, next, -94);
- /* fallthrough */
- case 93:
- CRCtriplet(crc, next, -93);
- /* fallthrough */
- case 92:
- CRCtriplet(crc, next, -92);
- /* fallthrough */
- case 91:
- CRCtriplet(crc, next, -91);
- /* fallthrough */
- case 90:
- CRCtriplet(crc, next, -90);
- /* fallthrough */
- case 89:
- CRCtriplet(crc, next, -89);
- /* fallthrough */
- case 88:
- CRCtriplet(crc, next, -88);
- /* fallthrough */
- case 87:
- CRCtriplet(crc, next, -87);
- /* fallthrough */
- case 86:
- CRCtriplet(crc, next, -86);
- /* fallthrough */
- case 85:
- CRCtriplet(crc, next, -85);
- /* fallthrough */
- case 84:
- CRCtriplet(crc, next, -84);
- /* fallthrough */
- case 83:
- CRCtriplet(crc, next, -83);
- /* fallthrough */
- case 82:
- CRCtriplet(crc, next, -82);
- /* fallthrough */
- case 81:
- CRCtriplet(crc, next, -81);
- /* fallthrough */
- case 80:
- CRCtriplet(crc, next, -80);
- /* fallthrough */
- case 79:
- CRCtriplet(crc, next, -79);
- /* fallthrough */
- case 78:
- CRCtriplet(crc, next, -78);
- /* fallthrough */
- case 77:
- CRCtriplet(crc, next, -77);
- /* fallthrough */
- case 76:
- CRCtriplet(crc, next, -76);
- /* fallthrough */
- case 75:
- CRCtriplet(crc, next, -75);
- /* fallthrough */
- case 74:
- CRCtriplet(crc, next, -74);
- /* fallthrough */
- case 73:
- CRCtriplet(crc, next, -73);
- /* fallthrough */
- case 72:
- CRCtriplet(crc, next, -72);
- /* fallthrough */
- case 71:
- CRCtriplet(crc, next, -71);
- /* fallthrough */
- case 70:
- CRCtriplet(crc, next, -70);
- /* fallthrough */
- case 69:
- CRCtriplet(crc, next, -69);
- /* fallthrough */
- case 68:
- CRCtriplet(crc, next, -68);
- /* fallthrough */
- case 67:
- CRCtriplet(crc, next, -67);
- /* fallthrough */
- case 66:
- CRCtriplet(crc, next, -66);
- /* fallthrough */
- case 65:
- CRCtriplet(crc, next, -65);
- /* fallthrough */
- case 64:
- CRCtriplet(crc, next, -64);
- /* fallthrough */
- case 63:
- CRCtriplet(crc, next, -63);
- /* fallthrough */
- case 62:
- CRCtriplet(crc, next, -62);
- /* fallthrough */
- case 61:
- CRCtriplet(crc, next, -61);
- /* fallthrough */
- case 60:
- CRCtriplet(crc, next, -60);
- /* fallthrough */
- case 59:
- CRCtriplet(crc, next, -59);
- /* fallthrough */
- case 58:
- CRCtriplet(crc, next, -58);
- /* fallthrough */
- case 57:
- CRCtriplet(crc, next, -57);
- /* fallthrough */
- case 56:
- CRCtriplet(crc, next, -56);
- /* fallthrough */
- case 55:
- CRCtriplet(crc, next, -55);
- /* fallthrough */
- case 54:
- CRCtriplet(crc, next, -54);
- /* fallthrough */
- case 53:
- CRCtriplet(crc, next, -53);
- /* fallthrough */
- case 52:
- CRCtriplet(crc, next, -52);
- /* fallthrough */
- case 51:
- CRCtriplet(crc, next, -51);
- /* fallthrough */
- case 50:
- CRCtriplet(crc, next, -50);
- /* fallthrough */
- case 49:
- CRCtriplet(crc, next, -49);
- /* fallthrough */
- case 48:
- CRCtriplet(crc, next, -48);
- /* fallthrough */
- case 47:
- CRCtriplet(crc, next, -47);
- /* fallthrough */
- case 46:
- CRCtriplet(crc, next, -46);
- /* fallthrough */
- case 45:
- CRCtriplet(crc, next, -45);
- /* fallthrough */
- case 44:
- CRCtriplet(crc, next, -44);
- /* fallthrough */
- case 43:
- CRCtriplet(crc, next, -43);
- /* fallthrough */
- case 42:
- CRCtriplet(crc, next, -42);
- /* fallthrough */
- case 41:
- CRCtriplet(crc, next, -41);
- /* fallthrough */
- case 40:
- CRCtriplet(crc, next, -40);
- /* fallthrough */
- case 39:
- CRCtriplet(crc, next, -39);
- /* fallthrough */
- case 38:
- CRCtriplet(crc, next, -38);
- /* fallthrough */
- case 37:
- CRCtriplet(crc, next, -37);
- /* fallthrough */
- case 36:
- CRCtriplet(crc, next, -36);
- /* fallthrough */
- case 35:
- CRCtriplet(crc, next, -35);
- /* fallthrough */
- case 34:
- CRCtriplet(crc, next, -34);
- /* fallthrough */
- case 33:
- CRCtriplet(crc, next, -33);
- /* fallthrough */
- case 32:
- CRCtriplet(crc, next, -32);
- /* fallthrough */
- case 31:
- CRCtriplet(crc, next, -31);
- /* fallthrough */
- case 30:
- CRCtriplet(crc, next, -30);
- /* fallthrough */
- case 29:
- CRCtriplet(crc, next, -29);
- /* fallthrough */
- case 28:
- CRCtriplet(crc, next, -28);
- /* fallthrough */
- case 27:
- CRCtriplet(crc, next, -27);
- /* fallthrough */
- case 26:
- CRCtriplet(crc, next, -26);
- /* fallthrough */
- case 25:
- CRCtriplet(crc, next, -25);
- /* fallthrough */
- case 24:
- CRCtriplet(crc, next, -24);
- /* fallthrough */
- case 23:
- CRCtriplet(crc, next, -23);
- /* fallthrough */
- case 22:
- CRCtriplet(crc, next, -22);
- /* fallthrough */
- case 21:
- CRCtriplet(crc, next, -21);
- /* fallthrough */
- case 20:
- CRCtriplet(crc, next, -20);
- /* fallthrough */
- case 19:
- CRCtriplet(crc, next, -19);
- /* fallthrough */
- case 18:
- CRCtriplet(crc, next, -18);
- /* fallthrough */
- case 17:
- CRCtriplet(crc, next, -17);
- /* fallthrough */
- case 16:
- CRCtriplet(crc, next, -16);
- /* fallthrough */
- case 15:
- CRCtriplet(crc, next, -15);
- /* fallthrough */
- case 14:
- CRCtriplet(crc, next, -14);
- /* fallthrough */
- case 13:
- CRCtriplet(crc, next, -13);
- /* fallthrough */
- case 12:
- CRCtriplet(crc, next, -12);
- /* fallthrough */
- case 11:
- CRCtriplet(crc, next, -11);
- /* fallthrough */
- case 10:
- CRCtriplet(crc, next, -10);
- /* fallthrough */
- case 9:
- CRCtriplet(crc, next, -9);
- /* fallthrough */
- case 8:
- CRCtriplet(crc, next, -8);
- /* fallthrough */
- case 7:
- CRCtriplet(crc, next, -7);
- /* fallthrough */
- case 6:
- CRCtriplet(crc, next, -6);
- /* fallthrough */
- case 5:
- CRCtriplet(crc, next, -5);
- /* fallthrough */
- case 4:
- CRCtriplet(crc, next, -4);
- /* fallthrough */
- case 3:
- CRCtriplet(crc, next, -3);
- /* fallthrough */
- case 2:
- CRCtriplet(crc, next, -2);
- /* fallthrough */
- case 1:
- CRCduplet(crc, next, -1); // the final triplet is actually only 2
- //{ CombineCRC(); }
- crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
- if (--n > 0) {
- crc1 = crc2 = 0;
- block_size = 128;
- // points to the first byte of the next block
- next0 = next2 + 128;
- next1 = next0 + 128; // from here on all blocks are 128 long
- next2 = next1 + 128;
- }
- /* fallthrough */
- case 0:;
- } while (n > 0);
- }
- next = (const unsigned char*)next2;
- }
- uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
- len = len & 7;
- next += (count2 * 8);
- switch (count2) {
- case 27:
- CRCsinglet(crc0, next, -27 * 8);
- /* fallthrough */
- case 26:
- CRCsinglet(crc0, next, -26 * 8);
- /* fallthrough */
- case 25:
- CRCsinglet(crc0, next, -25 * 8);
- /* fallthrough */
- case 24:
- CRCsinglet(crc0, next, -24 * 8);
- /* fallthrough */
- case 23:
- CRCsinglet(crc0, next, -23 * 8);
- /* fallthrough */
- case 22:
- CRCsinglet(crc0, next, -22 * 8);
- /* fallthrough */
- case 21:
- CRCsinglet(crc0, next, -21 * 8);
- /* fallthrough */
- case 20:
- CRCsinglet(crc0, next, -20 * 8);
- /* fallthrough */
- case 19:
- CRCsinglet(crc0, next, -19 * 8);
- /* fallthrough */
- case 18:
- CRCsinglet(crc0, next, -18 * 8);
- /* fallthrough */
- case 17:
- CRCsinglet(crc0, next, -17 * 8);
- /* fallthrough */
- case 16:
- CRCsinglet(crc0, next, -16 * 8);
- /* fallthrough */
- case 15:
- CRCsinglet(crc0, next, -15 * 8);
- /* fallthrough */
- case 14:
- CRCsinglet(crc0, next, -14 * 8);
- /* fallthrough */
- case 13:
- CRCsinglet(crc0, next, -13 * 8);
- /* fallthrough */
- case 12:
- CRCsinglet(crc0, next, -12 * 8);
- /* fallthrough */
- case 11:
- CRCsinglet(crc0, next, -11 * 8);
- /* fallthrough */
- case 10:
- CRCsinglet(crc0, next, -10 * 8);
- /* fallthrough */
- case 9:
- CRCsinglet(crc0, next, -9 * 8);
- /* fallthrough */
- case 8:
- CRCsinglet(crc0, next, -8 * 8);
- /* fallthrough */
- case 7:
- CRCsinglet(crc0, next, -7 * 8);
- /* fallthrough */
- case 6:
- CRCsinglet(crc0, next, -6 * 8);
- /* fallthrough */
- case 5:
- CRCsinglet(crc0, next, -5 * 8);
- /* fallthrough */
- case 4:
- CRCsinglet(crc0, next, -4 * 8);
- /* fallthrough */
- case 3:
- CRCsinglet(crc0, next, -3 * 8);
- /* fallthrough */
- case 2:
- CRCsinglet(crc0, next, -2 * 8);
- /* fallthrough */
- case 1:
- CRCsinglet(crc0, next, -1 * 8);
- /* fallthrough */
- case 0:;
- }
- }
- {
- align_to_8(len, crc0, next);
- return (uint32_t)crc0 ^ 0xffffffffu;
- }
+static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size)
+{
+ return ChosenExtend(crc, buf, size);
}
-#else
-#define NO_THREEWAY_CRC32C
-#endif //HAVE_SSE42 && HAVE_PCLMUL
-
-static inline Function Choose_Extend() {
-#ifdef HAVE_POWER8
- return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
+extern "C" const char *my_crc32c_implementation()
+{
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+ if (ChosenExtend == ExtendPPCImpl)
+ return "Using POWER8 crc32 instructions";
#elif defined(HAVE_ARMV8_CRC)
- if(crc32c_aarch64_available()) {
- return ExtendARMImpl;
- } else {
- return ExtendImpl<Slow_CRC32>;
- }
-#else
- if (isSSE42()) {
- if (isPCLMULQDQ()) {
-#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
- return crc32c_3way;
-#else
- return ExtendImpl<Fast_CRC32>; // Fast_CRC32 will check HAVE_SSE42 itself
-#endif
- }
- else { // no runtime PCLMULQDQ support but has SSE42 support
- return ExtendImpl<Fast_CRC32>;
- }
- } // end of isSSE42()
- else {
- return ExtendImpl<Slow_CRC32>;
- }
+ if (const char *ret= crc32c_aarch64_available())
+ return ret;
+#elif HAVE_SSE42
+# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
+ if (ChosenExtend == crc32c_3way)
+ return "Using crc32 + pclmulqdq instructions";
+# endif
+ if (ChosenExtend == crc32c_sse42)
+ return "Using SSE4.2 crc32 instructions";
#endif
-}
-
-static const Function ChosenExtend = Choose_Extend();
-
-static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
- return ChosenExtend(crc, buf, size);
+ return "Using generic crc32 instructions";
}
} // namespace crc32c
} // namespace mysys_namespace
-extern "C" unsigned int my_crc32c(unsigned int crc, const char *buf, size_t size)
+extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size)
{
return mysys_namespace::crc32c::Extend(crc,buf, size);
}
diff --git a/mysys/crc32/crc32c_amd64.cc b/mysys/crc32/crc32c_amd64.cc
new file mode 100644
index 00000000000..22c492b457f
--- /dev/null
+++ b/mysys/crc32/crc32c_amd64.cc
@@ -0,0 +1,711 @@
+/* Copyright (c) 2020, 2021, MariaDB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+ * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ * Ferry Toth
+ * ftoth@exalondelft.nl
+ *
+ * https://github.com/htot/crc32c
+ *
+ * Modified by Facebook
+ *
+ * Original intel whitepaper:
+ * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
+ * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ *
+ * This version is from the folly library, created by Dave Watson <davejwatson@fb.com>
+ *
+*/
+
+#include <stdint.h>
+#include <nmmintrin.h>
+#include <wmmintrin.h>
+
+
+#define CRCtriplet(crc, buf, offset) \
+ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+ crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
+ crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
+
+#define CRCduplet(crc, buf, offset) \
+ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+ crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
+
+#define CRCsinglet(crc, buf, offset) \
+ crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
+
+
+// Numbers taken directly from intel whitepaper.
+// clang-format off
+static const uint64_t clmul_constants alignas(16) [] = {
+ 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
+ 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
+ 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
+ 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
+ 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
+ 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
+ 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
+ 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
+ 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
+ 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
+ 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
+ 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
+ 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
+ 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
+ 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
+ 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
+ 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
+ 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
+ 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
+ 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
+ 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
+ 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
+ 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
+ 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
+ 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
+ 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
+ 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
+ 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
+ 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
+ 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
+ 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
+ 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
+ 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
+ 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
+ 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
+ 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
+ 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
+ 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
+ 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
+ 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
+ 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
+ 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
+ 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
+ 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
+ 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
+ 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
+ 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
+ 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
+ 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
+ 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
+ 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
+ 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
+ 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
+ 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
+ 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
+ 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
+ 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
+ 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
+ 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
+ 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
+ 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
+ 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
+ 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
+ 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
+};
+
+// Compute the crc32c value for buffer smaller than 8
+static inline void align_to_8(
+ size_t len,
+ uint64_t& crc0, // crc so far, updated on return
+ const unsigned char*& next) { // next data pointer, updated on return
+ uint32_t crc32bit = static_cast<uint32_t>(crc0);
+ if (len & 0x04) {
+ crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
+ next += sizeof(uint32_t);
+ }
+ if (len & 0x02) {
+ crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
+ next += sizeof(uint16_t);
+ }
+ if (len & 0x01) {
+ crc32bit = _mm_crc32_u8(crc32bit, *(next));
+ next++;
+ }
+ crc0 = crc32bit;
+}
+
+//
+// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
+// chosen constant and xor's these with the remaining CRC.
+//
+static inline uint64_t CombineCRC(
+ size_t block_size,
+ uint64_t crc0,
+ uint64_t crc1,
+ uint64_t crc2,
+ const uint64_t* next2) {
+ const auto multiplier =
+ *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
+ const auto crc0_xmm = _mm_set_epi64x(0, crc0);
+ const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
+ const auto crc1_xmm = _mm_set_epi64x(0, crc1);
+ const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
+ const auto res = _mm_xor_si128(res0, res1);
+ crc0 = _mm_cvtsi128_si64(res);
+ crc0 = crc0 ^ *((uint64_t*)next2 - 1);
+ crc2 = _mm_crc32_u64(crc2, crc0);
+ return crc2;
+}
+
+// Compute CRC-32C using the Intel hardware instruction.
+extern "C"
+uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len)
+{
+ const unsigned char* next = (const unsigned char*)buf;
+ uint64_t count;
+ uint64_t crc0, crc1, crc2;
+ crc0 = crc ^ 0xffffffffu;
+
+
+ if (len >= 8) {
+ // if len > 216 then align and use triplets
+ if (len > 216) {
+ {
+ // Work on the bytes (< 8) before the first 8-byte alignment addr starts
+ auto align_bytes = (8 - (uintptr_t)next) & 7;
+ len -= align_bytes;
+ align_to_8(align_bytes, crc0, next);
+ }
+
+ // Now work on the remaining blocks
+ count = len / 24; // number of triplets
+ len %= 24; // bytes remaining
+ uint64_t n = count >> 7; // #blocks = first block + full blocks
+ uint64_t block_size = count & 127;
+ if (block_size == 0) {
+ block_size = 128;
+ } else {
+ n++;
+ }
+ // points to the first byte of the next block
+ const uint64_t* next0 = (uint64_t*)next + block_size;
+ const uint64_t* next1 = next0 + block_size;
+ const uint64_t* next2 = next1 + block_size;
+
+ crc1 = crc2 = 0;
+ // Use Duff's device, a for() loop inside a switch()
+ // statement. This needs to execute at least once, round len
+ // down to nearest triplet multiple
+ switch (block_size) {
+ case 128:
+ do {
+ // jumps here for a full block of len 128
+ CRCtriplet(crc, next, -128);
+ /* fallthrough */
+ case 127:
+ // jumps here or below for the first block smaller
+ CRCtriplet(crc, next, -127);
+ /* fallthrough */
+ case 126:
+ CRCtriplet(crc, next, -126); // than 128
+ /* fallthrough */
+ case 125:
+ CRCtriplet(crc, next, -125);
+ /* fallthrough */
+ case 124:
+ CRCtriplet(crc, next, -124);
+ /* fallthrough */
+ case 123:
+ CRCtriplet(crc, next, -123);
+ /* fallthrough */
+ case 122:
+ CRCtriplet(crc, next, -122);
+ /* fallthrough */
+ case 121:
+ CRCtriplet(crc, next, -121);
+ /* fallthrough */
+ case 120:
+ CRCtriplet(crc, next, -120);
+ /* fallthrough */
+ case 119:
+ CRCtriplet(crc, next, -119);
+ /* fallthrough */
+ case 118:
+ CRCtriplet(crc, next, -118);
+ /* fallthrough */
+ case 117:
+ CRCtriplet(crc, next, -117);
+ /* fallthrough */
+ case 116:
+ CRCtriplet(crc, next, -116);
+ /* fallthrough */
+ case 115:
+ CRCtriplet(crc, next, -115);
+ /* fallthrough */
+ case 114:
+ CRCtriplet(crc, next, -114);
+ /* fallthrough */
+ case 113:
+ CRCtriplet(crc, next, -113);
+ /* fallthrough */
+ case 112:
+ CRCtriplet(crc, next, -112);
+ /* fallthrough */
+ case 111:
+ CRCtriplet(crc, next, -111);
+ /* fallthrough */
+ case 110:
+ CRCtriplet(crc, next, -110);
+ /* fallthrough */
+ case 109:
+ CRCtriplet(crc, next, -109);
+ /* fallthrough */
+ case 108:
+ CRCtriplet(crc, next, -108);
+ /* fallthrough */
+ case 107:
+ CRCtriplet(crc, next, -107);
+ /* fallthrough */
+ case 106:
+ CRCtriplet(crc, next, -106);
+ /* fallthrough */
+ case 105:
+ CRCtriplet(crc, next, -105);
+ /* fallthrough */
+ case 104:
+ CRCtriplet(crc, next, -104);
+ /* fallthrough */
+ case 103:
+ CRCtriplet(crc, next, -103);
+ /* fallthrough */
+ case 102:
+ CRCtriplet(crc, next, -102);
+ /* fallthrough */
+ case 101:
+ CRCtriplet(crc, next, -101);
+ /* fallthrough */
+ case 100:
+ CRCtriplet(crc, next, -100);
+ /* fallthrough */
+ case 99:
+ CRCtriplet(crc, next, -99);
+ /* fallthrough */
+ case 98:
+ CRCtriplet(crc, next, -98);
+ /* fallthrough */
+ case 97:
+ CRCtriplet(crc, next, -97);
+ /* fallthrough */
+ case 96:
+ CRCtriplet(crc, next, -96);
+ /* fallthrough */
+ case 95:
+ CRCtriplet(crc, next, -95);
+ /* fallthrough */
+ case 94:
+ CRCtriplet(crc, next, -94);
+ /* fallthrough */
+ case 93:
+ CRCtriplet(crc, next, -93);
+ /* fallthrough */
+ case 92:
+ CRCtriplet(crc, next, -92);
+ /* fallthrough */
+ case 91:
+ CRCtriplet(crc, next, -91);
+ /* fallthrough */
+ case 90:
+ CRCtriplet(crc, next, -90);
+ /* fallthrough */
+ case 89:
+ CRCtriplet(crc, next, -89);
+ /* fallthrough */
+ case 88:
+ CRCtriplet(crc, next, -88);
+ /* fallthrough */
+ case 87:
+ CRCtriplet(crc, next, -87);
+ /* fallthrough */
+ case 86:
+ CRCtriplet(crc, next, -86);
+ /* fallthrough */
+ case 85:
+ CRCtriplet(crc, next, -85);
+ /* fallthrough */
+ case 84:
+ CRCtriplet(crc, next, -84);
+ /* fallthrough */
+ case 83:
+ CRCtriplet(crc, next, -83);
+ /* fallthrough */
+ case 82:
+ CRCtriplet(crc, next, -82);
+ /* fallthrough */
+ case 81:
+ CRCtriplet(crc, next, -81);
+ /* fallthrough */
+ case 80:
+ CRCtriplet(crc, next, -80);
+ /* fallthrough */
+ case 79:
+ CRCtriplet(crc, next, -79);
+ /* fallthrough */
+ case 78:
+ CRCtriplet(crc, next, -78);
+ /* fallthrough */
+ case 77:
+ CRCtriplet(crc, next, -77);
+ /* fallthrough */
+ case 76:
+ CRCtriplet(crc, next, -76);
+ /* fallthrough */
+ case 75:
+ CRCtriplet(crc, next, -75);
+ /* fallthrough */
+ case 74:
+ CRCtriplet(crc, next, -74);
+ /* fallthrough */
+ case 73:
+ CRCtriplet(crc, next, -73);
+ /* fallthrough */
+ case 72:
+ CRCtriplet(crc, next, -72);
+ /* fallthrough */
+ case 71:
+ CRCtriplet(crc, next, -71);
+ /* fallthrough */
+ case 70:
+ CRCtriplet(crc, next, -70);
+ /* fallthrough */
+ case 69:
+ CRCtriplet(crc, next, -69);
+ /* fallthrough */
+ case 68:
+ CRCtriplet(crc, next, -68);
+ /* fallthrough */
+ case 67:
+ CRCtriplet(crc, next, -67);
+ /* fallthrough */
+ case 66:
+ CRCtriplet(crc, next, -66);
+ /* fallthrough */
+ case 65:
+ CRCtriplet(crc, next, -65);
+ /* fallthrough */
+ case 64:
+ CRCtriplet(crc, next, -64);
+ /* fallthrough */
+ case 63:
+ CRCtriplet(crc, next, -63);
+ /* fallthrough */
+ case 62:
+ CRCtriplet(crc, next, -62);
+ /* fallthrough */
+ case 61:
+ CRCtriplet(crc, next, -61);
+ /* fallthrough */
+ case 60:
+ CRCtriplet(crc, next, -60);
+ /* fallthrough */
+ case 59:
+ CRCtriplet(crc, next, -59);
+ /* fallthrough */
+ case 58:
+ CRCtriplet(crc, next, -58);
+ /* fallthrough */
+ case 57:
+ CRCtriplet(crc, next, -57);
+ /* fallthrough */
+ case 56:
+ CRCtriplet(crc, next, -56);
+ /* fallthrough */
+ case 55:
+ CRCtriplet(crc, next, -55);
+ /* fallthrough */
+ case 54:
+ CRCtriplet(crc, next, -54);
+ /* fallthrough */
+ case 53:
+ CRCtriplet(crc, next, -53);
+ /* fallthrough */
+ case 52:
+ CRCtriplet(crc, next, -52);
+ /* fallthrough */
+ case 51:
+ CRCtriplet(crc, next, -51);
+ /* fallthrough */
+ case 50:
+ CRCtriplet(crc, next, -50);
+ /* fallthrough */
+ case 49:
+ CRCtriplet(crc, next, -49);
+ /* fallthrough */
+ case 48:
+ CRCtriplet(crc, next, -48);
+ /* fallthrough */
+ case 47:
+ CRCtriplet(crc, next, -47);
+ /* fallthrough */
+ case 46:
+ CRCtriplet(crc, next, -46);
+ /* fallthrough */
+ case 45:
+ CRCtriplet(crc, next, -45);
+ /* fallthrough */
+ case 44:
+ CRCtriplet(crc, next, -44);
+ /* fallthrough */
+ case 43:
+ CRCtriplet(crc, next, -43);
+ /* fallthrough */
+ case 42:
+ CRCtriplet(crc, next, -42);
+ /* fallthrough */
+ case 41:
+ CRCtriplet(crc, next, -41);
+ /* fallthrough */
+ case 40:
+ CRCtriplet(crc, next, -40);
+ /* fallthrough */
+ case 39:
+ CRCtriplet(crc, next, -39);
+ /* fallthrough */
+ case 38:
+ CRCtriplet(crc, next, -38);
+ /* fallthrough */
+ case 37:
+ CRCtriplet(crc, next, -37);
+ /* fallthrough */
+ case 36:
+ CRCtriplet(crc, next, -36);
+ /* fallthrough */
+ case 35:
+ CRCtriplet(crc, next, -35);
+ /* fallthrough */
+ case 34:
+ CRCtriplet(crc, next, -34);
+ /* fallthrough */
+ case 33:
+ CRCtriplet(crc, next, -33);
+ /* fallthrough */
+ case 32:
+ CRCtriplet(crc, next, -32);
+ /* fallthrough */
+ case 31:
+ CRCtriplet(crc, next, -31);
+ /* fallthrough */
+ case 30:
+ CRCtriplet(crc, next, -30);
+ /* fallthrough */
+ case 29:
+ CRCtriplet(crc, next, -29);
+ /* fallthrough */
+ case 28:
+ CRCtriplet(crc, next, -28);
+ /* fallthrough */
+ case 27:
+ CRCtriplet(crc, next, -27);
+ /* fallthrough */
+ case 26:
+ CRCtriplet(crc, next, -26);
+ /* fallthrough */
+ case 25:
+ CRCtriplet(crc, next, -25);
+ /* fallthrough */
+ case 24:
+ CRCtriplet(crc, next, -24);
+ /* fallthrough */
+ case 23:
+ CRCtriplet(crc, next, -23);
+ /* fallthrough */
+ case 22:
+ CRCtriplet(crc, next, -22);
+ /* fallthrough */
+ case 21:
+ CRCtriplet(crc, next, -21);
+ /* fallthrough */
+ case 20:
+ CRCtriplet(crc, next, -20);
+ /* fallthrough */
+ case 19:
+ CRCtriplet(crc, next, -19);
+ /* fallthrough */
+ case 18:
+ CRCtriplet(crc, next, -18);
+ /* fallthrough */
+ case 17:
+ CRCtriplet(crc, next, -17);
+ /* fallthrough */
+ case 16:
+ CRCtriplet(crc, next, -16);
+ /* fallthrough */
+ case 15:
+ CRCtriplet(crc, next, -15);
+ /* fallthrough */
+ case 14:
+ CRCtriplet(crc, next, -14);
+ /* fallthrough */
+ case 13:
+ CRCtriplet(crc, next, -13);
+ /* fallthrough */
+ case 12:
+ CRCtriplet(crc, next, -12);
+ /* fallthrough */
+ case 11:
+ CRCtriplet(crc, next, -11);
+ /* fallthrough */
+ case 10:
+ CRCtriplet(crc, next, -10);
+ /* fallthrough */
+ case 9:
+ CRCtriplet(crc, next, -9);
+ /* fallthrough */
+ case 8:
+ CRCtriplet(crc, next, -8);
+ /* fallthrough */
+ case 7:
+ CRCtriplet(crc, next, -7);
+ /* fallthrough */
+ case 6:
+ CRCtriplet(crc, next, -6);
+ /* fallthrough */
+ case 5:
+ CRCtriplet(crc, next, -5);
+ /* fallthrough */
+ case 4:
+ CRCtriplet(crc, next, -4);
+ /* fallthrough */
+ case 3:
+ CRCtriplet(crc, next, -3);
+ /* fallthrough */
+ case 2:
+ CRCtriplet(crc, next, -2);
+ /* fallthrough */
+ case 1:
+ CRCduplet(crc, next, -1); // the final triplet is actually only 2
+ //{ CombineCRC(); }
+ crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
+ if (--n > 0) {
+ crc1 = crc2 = 0;
+ block_size = 128;
+ // points to the first byte of the next block
+ next0 = next2 + 128;
+ next1 = next0 + 128; // from here on all blocks are 128 long
+ next2 = next1 + 128;
+ }
+ /* fallthrough */
+ case 0:;
+ } while (n > 0);
+ }
+ next = (const unsigned char*)next2;
+ }
+ uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
+ len = len & 7;
+ next += (count2 * 8);
+ switch (count2) {
+ case 27:
+ CRCsinglet(crc0, next, -27 * 8);
+ /* fallthrough */
+ case 26:
+ CRCsinglet(crc0, next, -26 * 8);
+ /* fallthrough */
+ case 25:
+ CRCsinglet(crc0, next, -25 * 8);
+ /* fallthrough */
+ case 24:
+ CRCsinglet(crc0, next, -24 * 8);
+ /* fallthrough */
+ case 23:
+ CRCsinglet(crc0, next, -23 * 8);
+ /* fallthrough */
+ case 22:
+ CRCsinglet(crc0, next, -22 * 8);
+ /* fallthrough */
+ case 21:
+ CRCsinglet(crc0, next, -21 * 8);
+ /* fallthrough */
+ case 20:
+ CRCsinglet(crc0, next, -20 * 8);
+ /* fallthrough */
+ case 19:
+ CRCsinglet(crc0, next, -19 * 8);
+ /* fallthrough */
+ case 18:
+ CRCsinglet(crc0, next, -18 * 8);
+ /* fallthrough */
+ case 17:
+ CRCsinglet(crc0, next, -17 * 8);
+ /* fallthrough */
+ case 16:
+ CRCsinglet(crc0, next, -16 * 8);
+ /* fallthrough */
+ case 15:
+ CRCsinglet(crc0, next, -15 * 8);
+ /* fallthrough */
+ case 14:
+ CRCsinglet(crc0, next, -14 * 8);
+ /* fallthrough */
+ case 13:
+ CRCsinglet(crc0, next, -13 * 8);
+ /* fallthrough */
+ case 12:
+ CRCsinglet(crc0, next, -12 * 8);
+ /* fallthrough */
+ case 11:
+ CRCsinglet(crc0, next, -11 * 8);
+ /* fallthrough */
+ case 10:
+ CRCsinglet(crc0, next, -10 * 8);
+ /* fallthrough */
+ case 9:
+ CRCsinglet(crc0, next, -9 * 8);
+ /* fallthrough */
+ case 8:
+ CRCsinglet(crc0, next, -8 * 8);
+ /* fallthrough */
+ case 7:
+ CRCsinglet(crc0, next, -7 * 8);
+ /* fallthrough */
+ case 6:
+ CRCsinglet(crc0, next, -6 * 8);
+ /* fallthrough */
+ case 5:
+ CRCsinglet(crc0, next, -5 * 8);
+ /* fallthrough */
+ case 4:
+ CRCsinglet(crc0, next, -4 * 8);
+ /* fallthrough */
+ case 3:
+ CRCsinglet(crc0, next, -3 * 8);
+ /* fallthrough */
+ case 2:
+ CRCsinglet(crc0, next, -2 * 8);
+ /* fallthrough */
+ case 1:
+ CRCsinglet(crc0, next, -1 * 8);
+ /* fallthrough */
+ case 0:;
+ }
+ }
+ {
+ align_to_8(len, crc0, next);
+ return (uint32_t)crc0 ^ 0xffffffffu;
+ }
+}
diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc
index 5f8344b4f9d..bbafa1230f8 100644
--- a/mysys/crc32ieee.cc
+++ b/mysys/crc32ieee.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2020, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -39,25 +39,23 @@ typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
static my_crc32_t init_crc32()
{
- my_crc32_t func= my_crc32_zlib;
#ifdef HAVE_PCLMUL
if (crc32_pclmul_enabled())
- func = crc32_pclmul;
+ return crc32_pclmul;
#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
if (crc32_aarch64_available())
- func= crc32_aarch64;
+ return crc32_aarch64;
#endif
- return func;
+ return my_crc32_zlib;
}
static const my_crc32_t my_checksum_func= init_crc32();
-#ifndef __powerpc64__
-/* For powerpc, my_checksum is defined elsewhere.*/
-extern "C" unsigned int my_checksum(unsigned int crc, const void *data, size_t len)
+#ifdef __powerpc64__
+# error "my_checksum() is defined in mysys/crc32/crc32_ppc64.c"
+#endif
+extern "C"
+unsigned int my_checksum(unsigned int crc, const void *data, size_t len)
{
return my_checksum_func(crc, data, len);
}
-#endif
-
-