summaryrefslogtreecommitdiff
path: root/mysys
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-04-14 12:32:27 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-04-14 12:32:27 +0300
commitd2e2d32933823623fa3598c8e2b8a5a322e435bb (patch)
tree4a0094ff26be1e985281ef008433ce1493b58ae7 /mysys
parent72e0601d11ac40a27ce071cba8626612bc625e3c (diff)
parent6c3e860cbf36831c118f6ea183acbbeb3c889bed (diff)
downloadmariadb-git-d2e2d32933823623fa3598c8e2b8a5a322e435bb.tar.gz
Merge 10.5 into 10.6
Diffstat (limited to 'mysys')
-rw-r--r--mysys/CMakeLists.txt32
-rw-r--r--mysys/crc32/crc32_x86.c28
-rw-r--r--mysys/crc32/crc32c.cc955
-rw-r--r--mysys/crc32/crc32c_amd64.cc711
-rw-r--r--mysys/crc32ieee.cc20
5 files changed, 879 insertions, 867 deletions
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index d2d740aba17..5a4eeeba603 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -16,7 +16,7 @@
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
-SET(MYSYS_SOURCES array.c charset-def.c charset.c crc32ieee.cc my_default.c
+SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c
get_password.c
errors.c hash.c list.c
mf_cache.c mf_dirname.c mf_fn_ext.c
@@ -60,19 +60,29 @@ ENDIF()
IF(MSVC)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
+ IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
+ ENDIF()
ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
IF(CLANG_CL)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.cc crc32/crc32c.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
- MY_CHECK_C_COMPILER_FLAG(-msse4.2)
- MY_CHECK_C_COMPILER_FLAG(-mpclmul)
+ MY_CHECK_CXX_COMPILER_FLAG(-msse4.2)
+ MY_CHECK_CXX_COMPILER_FLAG(-mpclmul)
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
CHECK_INCLUDE_FILE(x86intrin.h HAVE_X86INTRIN_H)
- IF(have_C__msse4.2 AND have_C__mpclmul AND HAVE_CPUID_H AND HAVE_X86INTRIN_H)
- SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
- SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c crc32/crc32c.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
- ADD_DEFINITIONS(-DHAVE_SSE42 -DHAVE_PCLMUL)
+ IF(have_CXX__msse4.2 AND HAVE_CPUID_H)
+ ADD_DEFINITIONS(-DHAVE_SSE42)
+ IF (have_CXX__mpclmul AND HAVE_X86INTRIN_H)
+ ADD_DEFINITIONS(-DHAVE_PCLMUL)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32c_amd64.cc)
+ SET_SOURCE_FILES_PROPERTIES(crc32/crc32c_amd64.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
+ ENDIF()
+ ENDIF()
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
IF(CMAKE_COMPILER_IS_GNUCC)
@@ -129,11 +139,15 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
COMPILE_FLAGS "-march=armv8-a+crc+crypto")
ENDIF()
ENDIF()
-ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64" OR CMAKE_SYSTEM_NAME MATCHES AIX)
+ENDIF()
+
+IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64" OR CMAKE_SYSTEM_NAME MATCHES AIX)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_ppc64.c crc32/crc32c_ppc.c)
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_ppc64.c crc32/crc32c_ppc.c PROPERTIES
COMPILE_FLAGS "${COMPILE_FLAGS} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector")
ADD_DEFINITIONS(-DHAVE_POWER8 -DHAS_ALTIVEC)
+ELSE()
+ SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32ieee.cc)
ENDIF()
IF(UNIX)
diff --git a/mysys/crc32/crc32_x86.c b/mysys/crc32/crc32_x86.c
index 1e5d2a0a089..f077399caca 100644
--- a/mysys/crc32/crc32_x86.c
+++ b/mysys/crc32/crc32_x86.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2020 MariaDB
+/* Copyright (c) 2020, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -55,38 +55,14 @@
#include <stdint.h>
#include <stddef.h>
-#if defined(__GNUC__)
+#ifdef __GNUC__
#include <x86intrin.h>
-#include <cpuid.h>
#elif defined(_MSC_VER)
#include <intrin.h>
#else
#error "unknown compiler"
#endif
-static int has_sse42_and_pclmul(uint32_t recx)
-{
- /* 1 << 20 is SSE42, 1 << 1 is PCLMULQDQ */
-#define bits_SSE42_AND_PCLMUL (1 << 20 | 1 << 1)
- return (recx & bits_SSE42_AND_PCLMUL) == bits_SSE42_AND_PCLMUL;
-}
-
-#ifdef __GNUC__
-int crc32_pclmul_enabled(void)
-{
- uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
- __cpuid(1, reax, rebx, recx, redx);
- return has_sse42_and_pclmul(recx);
-}
-#elif defined(_MSC_VER)
-int crc32_pclmul_enabled(void)
-{
- int regs[4];
- __cpuid(regs, 1);
- return has_sse42_and_pclmul(regs[2]);
-}
-#endif
-
/**
* @brief Shifts left 128 bit register by specified number of bytes
*
diff --git a/mysys/crc32/crc32c.cc b/mysys/crc32/crc32c.cc
index b6c80886ec1..082d467e7da 100644
--- a/mysys/crc32/crc32c.cc
+++ b/mysys/crc32/crc32c.cc
@@ -32,11 +32,20 @@ static inline uint32_t DecodeFixed32(const char *ptr)
#endif
#ifdef HAVE_SSE42
-#include <nmmintrin.h>
-#include <wmmintrin.h>
-#ifdef __GNUC__
-#include <cpuid.h>
-#endif
+# ifdef __GNUC__
+# include <cpuid.h>
+# if __GNUC__ < 5 && !defined __clang__
+/* the headers do not really work in GCC before version 5 */
+# define _mm_crc32_u8(crc,data) __builtin_ia32_crc32qi(crc,data)
+# define _mm_crc32_u32(crc,data) __builtin_ia32_crc32si(crc,data)
+# define _mm_crc32_u64(crc,data) __builtin_ia32_crc32di(crc,data)
+# else
+# include <nmmintrin.h>
+# endif
+# define USE_SSE42 __attribute__((target("sse4.2")))
+# else
+# define USE_SSE42 /* nothing */
+# endif
#endif
@@ -337,19 +346,8 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p));
}
-#if defined(HAVE_SSE42) && (SIZEOF_SIZE_T == 8)
-
-static inline uint64_t DecodeFixed64(const char *ptr)
+static inline void Slow_CRC32(uint64_t* l, uint8_t const **p)
{
- return uint8korr(ptr);
-}
-
-static inline uint64_t LE_LOAD64(const uint8_t *p) {
- return DecodeFixed64(reinterpret_cast<const char*>(p));
-}
-#endif
-
-static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4;
*l = table3_[c & 0xff] ^
@@ -365,27 +363,6 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
table0_[c >> 24];
}
-__attribute__((unused)) static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
-#ifndef HAVE_SSE42
- Slow_CRC32(l, p);
-#elif (SIZEOF_SIZE_T == 8)
- *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
- *p += 8;
-#else
- *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
- *p += 4;
- *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
- *p += 4;
-#endif
-}
-
-template<void (*CRC32)(uint64_t*, uint8_t const**)>
-uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
-
- const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
- const uint8_t *e = p + size;
- uint64_t l = crc ^ 0xffffffffu;
-
#ifdef ALIGN
#undef ALIGN
#endif
@@ -398,70 +375,115 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
l = table0_[c] ^ (l >> 8); \
} while (0)
+static uint32_t crc32c_slow(uint32_t crc, const char* buf, size_t size)
+{
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint64_t l = crc ^ 0xffffffffu;
// Point x at first 16-byte aligned byte in string. This might be
// just past the end of the string.
const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
- if (x <= e) {
+ if (x <= e)
// Process bytes until finished or p is 16-byte aligned
- while (p != x) {
+ while (p != x)
STEP1;
- }
- }
// Process bytes 16 at a time
- while ((e-p) >= 16) {
- CRC32(&l, &p);
- CRC32(&l, &p);
+ while ((e-p) >= 16)
+ {
+ Slow_CRC32(&l, &p);
+ Slow_CRC32(&l, &p);
}
// Process bytes 8 at a time
- while ((e-p) >= 8) {
- CRC32(&l, &p);
- }
+ while ((e-p) >= 8)
+ Slow_CRC32(&l, &p);
// Process the last few bytes
- while (p != e) {
+ while (p != e)
STEP1;
- }
-#undef STEP1
-#undef ALIGN
return static_cast<uint32_t>(l ^ 0xffffffffu);
}
-// Detect if ARM64 CRC or not.
-#ifndef HAVE_ARMV8_CRC
-// Detect if SS42 or not.
-#ifndef HAVE_POWER8
+#if defined HAVE_POWER8
+#elif defined HAVE_ARMV8_CRC
+#elif defined HAVE_SSE42
+constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
+constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U<<1;
-static bool isSSE42() {
-#ifndef HAVE_SSE42
- return false;
-#elif defined(__GNUC__)
+static uint32_t cpuid_ecx()
+{
+#ifdef __GNUC__
uint32_t reax= 0, rebx= 0, recx= 0, redx= 0;
__cpuid(1, reax, rebx, recx, redx);
- return (recx & ((int)1 << 20)) != 0;
-#elif defined(_MSC_VER)
- int info[4];
- __cpuid(info, 0x00000001);
- return (info[2] & ((int)1 << 20)) != 0;
+ return recx;
+#elif defined _MSC_VER
+ int regs[4];
+ __cpuid(regs, 1);
+ return regs[2];
#else
- return false;
+# error "unknown compiler"
#endif
}
-#ifdef HAVE_SSE42
-extern "C" int crc32_pclmul_enabled();
-#endif
+extern "C" int crc32_pclmul_enabled(void)
+{
+ return !(~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL);
+}
-static bool isPCLMULQDQ() {
-#ifdef HAVE_SSE42
- return crc32_pclmul_enabled();
-#else
- return false;
+#if SIZEOF_SIZE_T == 8
+extern "C" uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len);
+
+USE_SSE42
+static inline uint64_t LE_LOAD64(const uint8_t *ptr)
+{
+ return uint8korr(reinterpret_cast<const char*>(ptr));
+}
#endif
+
+USE_SSE42
+static inline void Fast_CRC32(uint64_t* l, uint8_t const **p)
+{
+# if (SIZEOF_SIZE_T == 8)
+ *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
+ *p += 8;
+# else
+ *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+ *p += 4;
+ *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+ *p += 4;
+# endif
}
-#endif // HAVE_POWER8
-#endif // HAVE_ARMV8_CRC
+USE_SSE42
+static uint32_t crc32c_sse42(uint32_t crc, const char* buf, size_t size)
+{
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint64_t l = crc ^ 0xffffffffu;
+
+ // Point x at first 16-byte aligned byte in string. This might be
+ // just past the end of the string.
+ const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
+ const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
+ if (x <= e)
+ // Process bytes until finished or p is 16-byte aligned
+ while (p != x)
+ STEP1;
+ // Process bytes 16 at a time
+ while ((e-p) >= 16)
+ {
+ Fast_CRC32(&l, &p);
+ Fast_CRC32(&l, &p);
+ }
+ // Process bytes 8 at a time
+ while ((e-p) >= 8)
+ Fast_CRC32(&l, &p);
+ // Process the last few bytes
+ while (p != e)
+ STEP1;
+ return static_cast<uint32_t>(l ^ 0xffffffffu);
+}
+#endif
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
@@ -507,14 +529,6 @@ static int arch_ppc_probe(void) {
return arch_ppc_crc32;
}
#endif // __linux__
-
-static bool isAltiVec() {
- if (arch_ppc_probe()) {
- return true;
- } else {
- return false;
- }
-}
#endif
#if defined(HAVE_ARMV8_CRC)
@@ -526,760 +540,59 @@ static uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) {
}
#endif
-extern "C" const char * my_crc32c_implementation()
+static inline Function Choose_Extend()
{
-#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+#if defined HAVE_POWER8 && defined HAS_ALTIVEC
if (arch_ppc_probe())
- return "Using POWER8 crc32 instructions";
+ return ExtendPPCImpl;
#elif defined(HAVE_ARMV8_CRC)
- const char *ret = crc32c_aarch64_available();
- if (ret)
- return ret ;
+ if (crc32c_aarch64_available())
+ return ExtendARMImpl;
#elif HAVE_SSE42
- if (isSSE42())
- {
- if (SIZEOF_SIZE_T == 8 && isPCLMULQDQ())
- return "Using crc32 + pclmulqdq instructions";
- return "Using SSE4.2 crc32 instructions";
+# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
+ switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) {
+ case cpuid_ecx_SSE42_AND_PCLMUL:
+ return crc32c_3way;
+ case cpuid_ecx_SSE42:
+ return crc32c_sse42;
}
+# else
+ if (cpuid_ecx() & cpuid_ecx_SSE42)
+ return crc32c_sse42;
+# endif
#endif
- return "Using generic crc32 instructions";
-}
-
-
-/*
- * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the author be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- * Ferry Toth
- * ftoth@exalondelft.nl
- *
- * https://github.com/htot/crc32c
- *
- * Modified by Facebook
- *
- * Original intel whitepaper:
- * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
- * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- *
- * This version is from the folly library, created by Dave Watson <davejwatson@fb.com>
- *
-*/
-#if defined HAVE_SSE42 && defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
-
-
-#define CRCtriplet(crc, buf, offset) \
- crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
- crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
- crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
-
-#define CRCduplet(crc, buf, offset) \
- crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
- crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
-
-#define CRCsinglet(crc, buf, offset) \
- crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
-
-
-// Numbers taken directly from intel whitepaper.
-// clang-format off
-static const uint64_t clmul_constants[] = {
- 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
- 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
- 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
- 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
- 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
- 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
- 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
- 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
- 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
- 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
- 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
- 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
- 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
- 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
- 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
- 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
- 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
- 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
- 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
- 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
- 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
- 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
- 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
- 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
- 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
- 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
- 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
- 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
- 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
- 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
- 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
- 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
- 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
- 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
- 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
- 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
- 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
- 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
- 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
- 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
- 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
- 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
- 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
- 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
- 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
- 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
- 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
- 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
- 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
- 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
- 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
- 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
- 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
- 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
- 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
- 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
- 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
- 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
- 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
- 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
- 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
- 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
- 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
- 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
-};
-
-// Compute the crc32c value for buffer smaller than 8
-static inline void align_to_8(
- size_t len,
- uint64_t& crc0, // crc so far, updated on return
- const unsigned char*& next) { // next data pointer, updated on return
- uint32_t crc32bit = static_cast<uint32_t>(crc0);
- if (len & 0x04) {
- crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
- next += sizeof(uint32_t);
- }
- if (len & 0x02) {
- crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
- next += sizeof(uint16_t);
- }
- if (len & 0x01) {
- crc32bit = _mm_crc32_u8(crc32bit, *(next));
- next++;
- }
- crc0 = crc32bit;
-}
-
-//
-// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
-// chosen constant and xor's these with the remaining CRC.
-//
-static inline uint64_t CombineCRC(
- size_t block_size,
- uint64_t crc0,
- uint64_t crc1,
- uint64_t crc2,
- const uint64_t* next2) {
- const auto multiplier =
- *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
- const auto crc0_xmm = _mm_set_epi64x(0, crc0);
- const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
- const auto crc1_xmm = _mm_set_epi64x(0, crc1);
- const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
- const auto res = _mm_xor_si128(res0, res1);
- crc0 = _mm_cvtsi128_si64(res);
- crc0 = crc0 ^ *((uint64_t*)next2 - 1);
- crc2 = _mm_crc32_u64(crc2, crc0);
- return crc2;
+ return crc32c_slow;
}
-// Compute CRC-32C using the Intel hardware instruction.
-static inline uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
- const unsigned char* next = (const unsigned char*)buf;
- uint64_t count;
- uint64_t crc0, crc1, crc2;
- crc0 = crc ^ 0xffffffffu;
-
-
- if (len >= 8) {
- // if len > 216 then align and use triplets
- if (len > 216) {
- {
- // Work on the bytes (< 8) before the first 8-byte alignment addr starts
- auto align_bytes = (8 - (uintptr_t)next) & 7;
- len -= align_bytes;
- align_to_8(align_bytes, crc0, next);
- }
-
- // Now work on the remaining blocks
- count = len / 24; // number of triplets
- len %= 24; // bytes remaining
- uint64_t n = count >> 7; // #blocks = first block + full blocks
- uint64_t block_size = count & 127;
- if (block_size == 0) {
- block_size = 128;
- } else {
- n++;
- }
- // points to the first byte of the next block
- const uint64_t* next0 = (uint64_t*)next + block_size;
- const uint64_t* next1 = next0 + block_size;
- const uint64_t* next2 = next1 + block_size;
+static const Function ChosenExtend= Choose_Extend();
- crc1 = crc2 = 0;
- // Use Duff's device, a for() loop inside a switch()
- // statement. This needs to execute at least once, round len
- // down to nearest triplet multiple
- switch (block_size) {
- case 128:
- do {
- // jumps here for a full block of len 128
- CRCtriplet(crc, next, -128);
- /* fallthrough */
- case 127:
- // jumps here or below for the first block smaller
- CRCtriplet(crc, next, -127);
- /* fallthrough */
- case 126:
- CRCtriplet(crc, next, -126); // than 128
- /* fallthrough */
- case 125:
- CRCtriplet(crc, next, -125);
- /* fallthrough */
- case 124:
- CRCtriplet(crc, next, -124);
- /* fallthrough */
- case 123:
- CRCtriplet(crc, next, -123);
- /* fallthrough */
- case 122:
- CRCtriplet(crc, next, -122);
- /* fallthrough */
- case 121:
- CRCtriplet(crc, next, -121);
- /* fallthrough */
- case 120:
- CRCtriplet(crc, next, -120);
- /* fallthrough */
- case 119:
- CRCtriplet(crc, next, -119);
- /* fallthrough */
- case 118:
- CRCtriplet(crc, next, -118);
- /* fallthrough */
- case 117:
- CRCtriplet(crc, next, -117);
- /* fallthrough */
- case 116:
- CRCtriplet(crc, next, -116);
- /* fallthrough */
- case 115:
- CRCtriplet(crc, next, -115);
- /* fallthrough */
- case 114:
- CRCtriplet(crc, next, -114);
- /* fallthrough */
- case 113:
- CRCtriplet(crc, next, -113);
- /* fallthrough */
- case 112:
- CRCtriplet(crc, next, -112);
- /* fallthrough */
- case 111:
- CRCtriplet(crc, next, -111);
- /* fallthrough */
- case 110:
- CRCtriplet(crc, next, -110);
- /* fallthrough */
- case 109:
- CRCtriplet(crc, next, -109);
- /* fallthrough */
- case 108:
- CRCtriplet(crc, next, -108);
- /* fallthrough */
- case 107:
- CRCtriplet(crc, next, -107);
- /* fallthrough */
- case 106:
- CRCtriplet(crc, next, -106);
- /* fallthrough */
- case 105:
- CRCtriplet(crc, next, -105);
- /* fallthrough */
- case 104:
- CRCtriplet(crc, next, -104);
- /* fallthrough */
- case 103:
- CRCtriplet(crc, next, -103);
- /* fallthrough */
- case 102:
- CRCtriplet(crc, next, -102);
- /* fallthrough */
- case 101:
- CRCtriplet(crc, next, -101);
- /* fallthrough */
- case 100:
- CRCtriplet(crc, next, -100);
- /* fallthrough */
- case 99:
- CRCtriplet(crc, next, -99);
- /* fallthrough */
- case 98:
- CRCtriplet(crc, next, -98);
- /* fallthrough */
- case 97:
- CRCtriplet(crc, next, -97);
- /* fallthrough */
- case 96:
- CRCtriplet(crc, next, -96);
- /* fallthrough */
- case 95:
- CRCtriplet(crc, next, -95);
- /* fallthrough */
- case 94:
- CRCtriplet(crc, next, -94);
- /* fallthrough */
- case 93:
- CRCtriplet(crc, next, -93);
- /* fallthrough */
- case 92:
- CRCtriplet(crc, next, -92);
- /* fallthrough */
- case 91:
- CRCtriplet(crc, next, -91);
- /* fallthrough */
- case 90:
- CRCtriplet(crc, next, -90);
- /* fallthrough */
- case 89:
- CRCtriplet(crc, next, -89);
- /* fallthrough */
- case 88:
- CRCtriplet(crc, next, -88);
- /* fallthrough */
- case 87:
- CRCtriplet(crc, next, -87);
- /* fallthrough */
- case 86:
- CRCtriplet(crc, next, -86);
- /* fallthrough */
- case 85:
- CRCtriplet(crc, next, -85);
- /* fallthrough */
- case 84:
- CRCtriplet(crc, next, -84);
- /* fallthrough */
- case 83:
- CRCtriplet(crc, next, -83);
- /* fallthrough */
- case 82:
- CRCtriplet(crc, next, -82);
- /* fallthrough */
- case 81:
- CRCtriplet(crc, next, -81);
- /* fallthrough */
- case 80:
- CRCtriplet(crc, next, -80);
- /* fallthrough */
- case 79:
- CRCtriplet(crc, next, -79);
- /* fallthrough */
- case 78:
- CRCtriplet(crc, next, -78);
- /* fallthrough */
- case 77:
- CRCtriplet(crc, next, -77);
- /* fallthrough */
- case 76:
- CRCtriplet(crc, next, -76);
- /* fallthrough */
- case 75:
- CRCtriplet(crc, next, -75);
- /* fallthrough */
- case 74:
- CRCtriplet(crc, next, -74);
- /* fallthrough */
- case 73:
- CRCtriplet(crc, next, -73);
- /* fallthrough */
- case 72:
- CRCtriplet(crc, next, -72);
- /* fallthrough */
- case 71:
- CRCtriplet(crc, next, -71);
- /* fallthrough */
- case 70:
- CRCtriplet(crc, next, -70);
- /* fallthrough */
- case 69:
- CRCtriplet(crc, next, -69);
- /* fallthrough */
- case 68:
- CRCtriplet(crc, next, -68);
- /* fallthrough */
- case 67:
- CRCtriplet(crc, next, -67);
- /* fallthrough */
- case 66:
- CRCtriplet(crc, next, -66);
- /* fallthrough */
- case 65:
- CRCtriplet(crc, next, -65);
- /* fallthrough */
- case 64:
- CRCtriplet(crc, next, -64);
- /* fallthrough */
- case 63:
- CRCtriplet(crc, next, -63);
- /* fallthrough */
- case 62:
- CRCtriplet(crc, next, -62);
- /* fallthrough */
- case 61:
- CRCtriplet(crc, next, -61);
- /* fallthrough */
- case 60:
- CRCtriplet(crc, next, -60);
- /* fallthrough */
- case 59:
- CRCtriplet(crc, next, -59);
- /* fallthrough */
- case 58:
- CRCtriplet(crc, next, -58);
- /* fallthrough */
- case 57:
- CRCtriplet(crc, next, -57);
- /* fallthrough */
- case 56:
- CRCtriplet(crc, next, -56);
- /* fallthrough */
- case 55:
- CRCtriplet(crc, next, -55);
- /* fallthrough */
- case 54:
- CRCtriplet(crc, next, -54);
- /* fallthrough */
- case 53:
- CRCtriplet(crc, next, -53);
- /* fallthrough */
- case 52:
- CRCtriplet(crc, next, -52);
- /* fallthrough */
- case 51:
- CRCtriplet(crc, next, -51);
- /* fallthrough */
- case 50:
- CRCtriplet(crc, next, -50);
- /* fallthrough */
- case 49:
- CRCtriplet(crc, next, -49);
- /* fallthrough */
- case 48:
- CRCtriplet(crc, next, -48);
- /* fallthrough */
- case 47:
- CRCtriplet(crc, next, -47);
- /* fallthrough */
- case 46:
- CRCtriplet(crc, next, -46);
- /* fallthrough */
- case 45:
- CRCtriplet(crc, next, -45);
- /* fallthrough */
- case 44:
- CRCtriplet(crc, next, -44);
- /* fallthrough */
- case 43:
- CRCtriplet(crc, next, -43);
- /* fallthrough */
- case 42:
- CRCtriplet(crc, next, -42);
- /* fallthrough */
- case 41:
- CRCtriplet(crc, next, -41);
- /* fallthrough */
- case 40:
- CRCtriplet(crc, next, -40);
- /* fallthrough */
- case 39:
- CRCtriplet(crc, next, -39);
- /* fallthrough */
- case 38:
- CRCtriplet(crc, next, -38);
- /* fallthrough */
- case 37:
- CRCtriplet(crc, next, -37);
- /* fallthrough */
- case 36:
- CRCtriplet(crc, next, -36);
- /* fallthrough */
- case 35:
- CRCtriplet(crc, next, -35);
- /* fallthrough */
- case 34:
- CRCtriplet(crc, next, -34);
- /* fallthrough */
- case 33:
- CRCtriplet(crc, next, -33);
- /* fallthrough */
- case 32:
- CRCtriplet(crc, next, -32);
- /* fallthrough */
- case 31:
- CRCtriplet(crc, next, -31);
- /* fallthrough */
- case 30:
- CRCtriplet(crc, next, -30);
- /* fallthrough */
- case 29:
- CRCtriplet(crc, next, -29);
- /* fallthrough */
- case 28:
- CRCtriplet(crc, next, -28);
- /* fallthrough */
- case 27:
- CRCtriplet(crc, next, -27);
- /* fallthrough */
- case 26:
- CRCtriplet(crc, next, -26);
- /* fallthrough */
- case 25:
- CRCtriplet(crc, next, -25);
- /* fallthrough */
- case 24:
- CRCtriplet(crc, next, -24);
- /* fallthrough */
- case 23:
- CRCtriplet(crc, next, -23);
- /* fallthrough */
- case 22:
- CRCtriplet(crc, next, -22);
- /* fallthrough */
- case 21:
- CRCtriplet(crc, next, -21);
- /* fallthrough */
- case 20:
- CRCtriplet(crc, next, -20);
- /* fallthrough */
- case 19:
- CRCtriplet(crc, next, -19);
- /* fallthrough */
- case 18:
- CRCtriplet(crc, next, -18);
- /* fallthrough */
- case 17:
- CRCtriplet(crc, next, -17);
- /* fallthrough */
- case 16:
- CRCtriplet(crc, next, -16);
- /* fallthrough */
- case 15:
- CRCtriplet(crc, next, -15);
- /* fallthrough */
- case 14:
- CRCtriplet(crc, next, -14);
- /* fallthrough */
- case 13:
- CRCtriplet(crc, next, -13);
- /* fallthrough */
- case 12:
- CRCtriplet(crc, next, -12);
- /* fallthrough */
- case 11:
- CRCtriplet(crc, next, -11);
- /* fallthrough */
- case 10:
- CRCtriplet(crc, next, -10);
- /* fallthrough */
- case 9:
- CRCtriplet(crc, next, -9);
- /* fallthrough */
- case 8:
- CRCtriplet(crc, next, -8);
- /* fallthrough */
- case 7:
- CRCtriplet(crc, next, -7);
- /* fallthrough */
- case 6:
- CRCtriplet(crc, next, -6);
- /* fallthrough */
- case 5:
- CRCtriplet(crc, next, -5);
- /* fallthrough */
- case 4:
- CRCtriplet(crc, next, -4);
- /* fallthrough */
- case 3:
- CRCtriplet(crc, next, -3);
- /* fallthrough */
- case 2:
- CRCtriplet(crc, next, -2);
- /* fallthrough */
- case 1:
- CRCduplet(crc, next, -1); // the final triplet is actually only 2
- //{ CombineCRC(); }
- crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
- if (--n > 0) {
- crc1 = crc2 = 0;
- block_size = 128;
- // points to the first byte of the next block
- next0 = next2 + 128;
- next1 = next0 + 128; // from here on all blocks are 128 long
- next2 = next1 + 128;
- }
- /* fallthrough */
- case 0:;
- } while (n > 0);
- }
- next = (const unsigned char*)next2;
- }
- uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
- len = len & 7;
- next += (count2 * 8);
- switch (count2) {
- case 27:
- CRCsinglet(crc0, next, -27 * 8);
- /* fallthrough */
- case 26:
- CRCsinglet(crc0, next, -26 * 8);
- /* fallthrough */
- case 25:
- CRCsinglet(crc0, next, -25 * 8);
- /* fallthrough */
- case 24:
- CRCsinglet(crc0, next, -24 * 8);
- /* fallthrough */
- case 23:
- CRCsinglet(crc0, next, -23 * 8);
- /* fallthrough */
- case 22:
- CRCsinglet(crc0, next, -22 * 8);
- /* fallthrough */
- case 21:
- CRCsinglet(crc0, next, -21 * 8);
- /* fallthrough */
- case 20:
- CRCsinglet(crc0, next, -20 * 8);
- /* fallthrough */
- case 19:
- CRCsinglet(crc0, next, -19 * 8);
- /* fallthrough */
- case 18:
- CRCsinglet(crc0, next, -18 * 8);
- /* fallthrough */
- case 17:
- CRCsinglet(crc0, next, -17 * 8);
- /* fallthrough */
- case 16:
- CRCsinglet(crc0, next, -16 * 8);
- /* fallthrough */
- case 15:
- CRCsinglet(crc0, next, -15 * 8);
- /* fallthrough */
- case 14:
- CRCsinglet(crc0, next, -14 * 8);
- /* fallthrough */
- case 13:
- CRCsinglet(crc0, next, -13 * 8);
- /* fallthrough */
- case 12:
- CRCsinglet(crc0, next, -12 * 8);
- /* fallthrough */
- case 11:
- CRCsinglet(crc0, next, -11 * 8);
- /* fallthrough */
- case 10:
- CRCsinglet(crc0, next, -10 * 8);
- /* fallthrough */
- case 9:
- CRCsinglet(crc0, next, -9 * 8);
- /* fallthrough */
- case 8:
- CRCsinglet(crc0, next, -8 * 8);
- /* fallthrough */
- case 7:
- CRCsinglet(crc0, next, -7 * 8);
- /* fallthrough */
- case 6:
- CRCsinglet(crc0, next, -6 * 8);
- /* fallthrough */
- case 5:
- CRCsinglet(crc0, next, -5 * 8);
- /* fallthrough */
- case 4:
- CRCsinglet(crc0, next, -4 * 8);
- /* fallthrough */
- case 3:
- CRCsinglet(crc0, next, -3 * 8);
- /* fallthrough */
- case 2:
- CRCsinglet(crc0, next, -2 * 8);
- /* fallthrough */
- case 1:
- CRCsinglet(crc0, next, -1 * 8);
- /* fallthrough */
- case 0:;
- }
- }
- {
- align_to_8(len, crc0, next);
- return (uint32_t)crc0 ^ 0xffffffffu;
- }
+static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size)
+{
+ return ChosenExtend(crc, buf, size);
}
-#else
-#define NO_THREEWAY_CRC32C
-#endif //HAVE_SSE42 && HAVE_PCLMUL
-
-static inline Function Choose_Extend() {
-#ifdef HAVE_POWER8
- return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
+extern "C" const char *my_crc32c_implementation()
+{
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+ if (ChosenExtend == ExtendPPCImpl)
+ return "Using POWER8 crc32 instructions";
#elif defined(HAVE_ARMV8_CRC)
- if(crc32c_aarch64_available()) {
- return ExtendARMImpl;
- } else {
- return ExtendImpl<Slow_CRC32>;
- }
-#else
- if (isSSE42()) {
- if (isPCLMULQDQ()) {
-#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
- return crc32c_3way;
-#else
- return ExtendImpl<Fast_CRC32>; // Fast_CRC32 will check HAVE_SSE42 itself
-#endif
- }
- else { // no runtime PCLMULQDQ support but has SSE42 support
- return ExtendImpl<Fast_CRC32>;
- }
- } // end of isSSE42()
- else {
- return ExtendImpl<Slow_CRC32>;
- }
+ if (const char *ret= crc32c_aarch64_available())
+ return ret;
+#elif HAVE_SSE42
+# if defined HAVE_PCLMUL && SIZEOF_SIZE_T == 8
+ if (ChosenExtend == crc32c_3way)
+ return "Using crc32 + pclmulqdq instructions";
+# endif
+ if (ChosenExtend == crc32c_sse42)
+ return "Using SSE4.2 crc32 instructions";
#endif
-}
-
-static const Function ChosenExtend = Choose_Extend();
-
-static inline uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
- return ChosenExtend(crc, buf, size);
+ return "Using generic crc32 instructions";
}
} // namespace crc32c
} // namespace mysys_namespace
-extern "C" unsigned int my_crc32c(unsigned int crc, const char *buf, size_t size)
+extern "C" unsigned my_crc32c(unsigned int crc, const char *buf, size_t size)
{
return mysys_namespace::crc32c::Extend(crc,buf, size);
}
diff --git a/mysys/crc32/crc32c_amd64.cc b/mysys/crc32/crc32c_amd64.cc
new file mode 100644
index 00000000000..22c492b457f
--- /dev/null
+++ b/mysys/crc32/crc32c_amd64.cc
@@ -0,0 +1,711 @@
+/* Copyright (c) 2020, 2021, MariaDB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+ * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ * Ferry Toth
+ * ftoth@exalondelft.nl
+ *
+ * https://github.com/htot/crc32c
+ *
+ * Modified by Facebook
+ *
+ * Original intel whitepaper:
+ * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
+ * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ *
+ * This version is from the folly library, created by Dave Watson <davejwatson@fb.com>
+ *
+*/
+
+#include <stdint.h>
+#include <nmmintrin.h>
+#include <wmmintrin.h>
+
+
+#define CRCtriplet(crc, buf, offset) \
+ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+ crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
+ crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
+
+#define CRCduplet(crc, buf, offset) \
+ crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+ crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
+
+#define CRCsinglet(crc, buf, offset) \
+ crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
+
+
+// Numbers taken directly from intel whitepaper.
+// clang-format off
+static const uint64_t clmul_constants alignas(16) [] = {
+ 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
+ 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
+ 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
+ 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
+ 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
+ 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
+ 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
+ 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
+ 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
+ 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
+ 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
+ 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
+ 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
+ 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
+ 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
+ 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
+ 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
+ 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
+ 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
+ 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
+ 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
+ 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
+ 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
+ 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
+ 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
+ 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
+ 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
+ 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
+ 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
+ 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
+ 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
+ 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
+ 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
+ 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
+ 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
+ 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
+ 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
+ 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
+ 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
+ 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
+ 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
+ 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
+ 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
+ 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
+ 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
+ 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
+ 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
+ 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
+ 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
+ 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
+ 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
+ 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
+ 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
+ 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
+ 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
+ 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
+ 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
+ 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
+ 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
+ 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
+ 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
+ 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
+ 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
+ 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
+};
+
+// Compute the crc32c value for buffer smaller than 8
+static inline void align_to_8(
+ size_t len,
+ uint64_t& crc0, // crc so far, updated on return
+ const unsigned char*& next) { // next data pointer, updated on return
+ uint32_t crc32bit = static_cast<uint32_t>(crc0);
+ if (len & 0x04) {
+ crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
+ next += sizeof(uint32_t);
+ }
+ if (len & 0x02) {
+ crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
+ next += sizeof(uint16_t);
+ }
+ if (len & 0x01) {
+ crc32bit = _mm_crc32_u8(crc32bit, *(next));
+ next++;
+ }
+ crc0 = crc32bit;
+}
+
+//
+// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
+// chosen constant and xor's these with the remaining CRC.
+//
+static inline uint64_t CombineCRC(
+ size_t block_size,
+ uint64_t crc0,
+ uint64_t crc1,
+ uint64_t crc2,
+ const uint64_t* next2) {
+ const auto multiplier =
+ *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
+ const auto crc0_xmm = _mm_set_epi64x(0, crc0);
+ const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
+ const auto crc1_xmm = _mm_set_epi64x(0, crc1);
+ const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
+ const auto res = _mm_xor_si128(res0, res1);
+ crc0 = _mm_cvtsi128_si64(res);
+ crc0 = crc0 ^ *((uint64_t*)next2 - 1);
+ crc2 = _mm_crc32_u64(crc2, crc0);
+ return crc2;
+}
+
+// Compute CRC-32C using the Intel hardware instruction.
+extern "C"
+uint32_t crc32c_3way(uint32_t crc, const char *buf, size_t len)
+{
+ const unsigned char* next = (const unsigned char*)buf;
+ uint64_t count;
+ uint64_t crc0, crc1, crc2;
+ crc0 = crc ^ 0xffffffffu;
+
+
+ if (len >= 8) {
+ // if len > 216 then align and use triplets
+ if (len > 216) {
+ {
+ // Work on the bytes (< 8) before the first 8-byte alignment addr starts
+ auto align_bytes = (8 - (uintptr_t)next) & 7;
+ len -= align_bytes;
+ align_to_8(align_bytes, crc0, next);
+ }
+
+ // Now work on the remaining blocks
+ count = len / 24; // number of triplets
+ len %= 24; // bytes remaining
+ uint64_t n = count >> 7; // #blocks = first block + full blocks
+ uint64_t block_size = count & 127;
+ if (block_size == 0) {
+ block_size = 128;
+ } else {
+ n++;
+ }
+ // points to the first byte of the next block
+ const uint64_t* next0 = (uint64_t*)next + block_size;
+ const uint64_t* next1 = next0 + block_size;
+ const uint64_t* next2 = next1 + block_size;
+
+ crc1 = crc2 = 0;
+ // Use Duff's device, a for() loop inside a switch()
+ // statement. This needs to execute at least once, round len
+ // down to nearest triplet multiple
+ switch (block_size) {
+ case 128:
+ do {
+ // jumps here for a full block of len 128
+ CRCtriplet(crc, next, -128);
+ /* fallthrough */
+ case 127:
+ // jumps here or below for the first block smaller
+ CRCtriplet(crc, next, -127);
+ /* fallthrough */
+ case 126:
+ CRCtriplet(crc, next, -126); // than 128
+ /* fallthrough */
+ case 125:
+ CRCtriplet(crc, next, -125);
+ /* fallthrough */
+ case 124:
+ CRCtriplet(crc, next, -124);
+ /* fallthrough */
+ case 123:
+ CRCtriplet(crc, next, -123);
+ /* fallthrough */
+ case 122:
+ CRCtriplet(crc, next, -122);
+ /* fallthrough */
+ case 121:
+ CRCtriplet(crc, next, -121);
+ /* fallthrough */
+ case 120:
+ CRCtriplet(crc, next, -120);
+ /* fallthrough */
+ case 119:
+ CRCtriplet(crc, next, -119);
+ /* fallthrough */
+ case 118:
+ CRCtriplet(crc, next, -118);
+ /* fallthrough */
+ case 117:
+ CRCtriplet(crc, next, -117);
+ /* fallthrough */
+ case 116:
+ CRCtriplet(crc, next, -116);
+ /* fallthrough */
+ case 115:
+ CRCtriplet(crc, next, -115);
+ /* fallthrough */
+ case 114:
+ CRCtriplet(crc, next, -114);
+ /* fallthrough */
+ case 113:
+ CRCtriplet(crc, next, -113);
+ /* fallthrough */
+ case 112:
+ CRCtriplet(crc, next, -112);
+ /* fallthrough */
+ case 111:
+ CRCtriplet(crc, next, -111);
+ /* fallthrough */
+ case 110:
+ CRCtriplet(crc, next, -110);
+ /* fallthrough */
+ case 109:
+ CRCtriplet(crc, next, -109);
+ /* fallthrough */
+ case 108:
+ CRCtriplet(crc, next, -108);
+ /* fallthrough */
+ case 107:
+ CRCtriplet(crc, next, -107);
+ /* fallthrough */
+ case 106:
+ CRCtriplet(crc, next, -106);
+ /* fallthrough */
+ case 105:
+ CRCtriplet(crc, next, -105);
+ /* fallthrough */
+ case 104:
+ CRCtriplet(crc, next, -104);
+ /* fallthrough */
+ case 103:
+ CRCtriplet(crc, next, -103);
+ /* fallthrough */
+ case 102:
+ CRCtriplet(crc, next, -102);
+ /* fallthrough */
+ case 101:
+ CRCtriplet(crc, next, -101);
+ /* fallthrough */
+ case 100:
+ CRCtriplet(crc, next, -100);
+ /* fallthrough */
+ case 99:
+ CRCtriplet(crc, next, -99);
+ /* fallthrough */
+ case 98:
+ CRCtriplet(crc, next, -98);
+ /* fallthrough */
+ case 97:
+ CRCtriplet(crc, next, -97);
+ /* fallthrough */
+ case 96:
+ CRCtriplet(crc, next, -96);
+ /* fallthrough */
+ case 95:
+ CRCtriplet(crc, next, -95);
+ /* fallthrough */
+ case 94:
+ CRCtriplet(crc, next, -94);
+ /* fallthrough */
+ case 93:
+ CRCtriplet(crc, next, -93);
+ /* fallthrough */
+ case 92:
+ CRCtriplet(crc, next, -92);
+ /* fallthrough */
+ case 91:
+ CRCtriplet(crc, next, -91);
+ /* fallthrough */
+ case 90:
+ CRCtriplet(crc, next, -90);
+ /* fallthrough */
+ case 89:
+ CRCtriplet(crc, next, -89);
+ /* fallthrough */
+ case 88:
+ CRCtriplet(crc, next, -88);
+ /* fallthrough */
+ case 87:
+ CRCtriplet(crc, next, -87);
+ /* fallthrough */
+ case 86:
+ CRCtriplet(crc, next, -86);
+ /* fallthrough */
+ case 85:
+ CRCtriplet(crc, next, -85);
+ /* fallthrough */
+ case 84:
+ CRCtriplet(crc, next, -84);
+ /* fallthrough */
+ case 83:
+ CRCtriplet(crc, next, -83);
+ /* fallthrough */
+ case 82:
+ CRCtriplet(crc, next, -82);
+ /* fallthrough */
+ case 81:
+ CRCtriplet(crc, next, -81);
+ /* fallthrough */
+ case 80:
+ CRCtriplet(crc, next, -80);
+ /* fallthrough */
+ case 79:
+ CRCtriplet(crc, next, -79);
+ /* fallthrough */
+ case 78:
+ CRCtriplet(crc, next, -78);
+ /* fallthrough */
+ case 77:
+ CRCtriplet(crc, next, -77);
+ /* fallthrough */
+ case 76:
+ CRCtriplet(crc, next, -76);
+ /* fallthrough */
+ case 75:
+ CRCtriplet(crc, next, -75);
+ /* fallthrough */
+ case 74:
+ CRCtriplet(crc, next, -74);
+ /* fallthrough */
+ case 73:
+ CRCtriplet(crc, next, -73);
+ /* fallthrough */
+ case 72:
+ CRCtriplet(crc, next, -72);
+ /* fallthrough */
+ case 71:
+ CRCtriplet(crc, next, -71);
+ /* fallthrough */
+ case 70:
+ CRCtriplet(crc, next, -70);
+ /* fallthrough */
+ case 69:
+ CRCtriplet(crc, next, -69);
+ /* fallthrough */
+ case 68:
+ CRCtriplet(crc, next, -68);
+ /* fallthrough */
+ case 67:
+ CRCtriplet(crc, next, -67);
+ /* fallthrough */
+ case 66:
+ CRCtriplet(crc, next, -66);
+ /* fallthrough */
+ case 65:
+ CRCtriplet(crc, next, -65);
+ /* fallthrough */
+ case 64:
+ CRCtriplet(crc, next, -64);
+ /* fallthrough */
+ case 63:
+ CRCtriplet(crc, next, -63);
+ /* fallthrough */
+ case 62:
+ CRCtriplet(crc, next, -62);
+ /* fallthrough */
+ case 61:
+ CRCtriplet(crc, next, -61);
+ /* fallthrough */
+ case 60:
+ CRCtriplet(crc, next, -60);
+ /* fallthrough */
+ case 59:
+ CRCtriplet(crc, next, -59);
+ /* fallthrough */
+ case 58:
+ CRCtriplet(crc, next, -58);
+ /* fallthrough */
+ case 57:
+ CRCtriplet(crc, next, -57);
+ /* fallthrough */
+ case 56:
+ CRCtriplet(crc, next, -56);
+ /* fallthrough */
+ case 55:
+ CRCtriplet(crc, next, -55);
+ /* fallthrough */
+ case 54:
+ CRCtriplet(crc, next, -54);
+ /* fallthrough */
+ case 53:
+ CRCtriplet(crc, next, -53);
+ /* fallthrough */
+ case 52:
+ CRCtriplet(crc, next, -52);
+ /* fallthrough */
+ case 51:
+ CRCtriplet(crc, next, -51);
+ /* fallthrough */
+ case 50:
+ CRCtriplet(crc, next, -50);
+ /* fallthrough */
+ case 49:
+ CRCtriplet(crc, next, -49);
+ /* fallthrough */
+ case 48:
+ CRCtriplet(crc, next, -48);
+ /* fallthrough */
+ case 47:
+ CRCtriplet(crc, next, -47);
+ /* fallthrough */
+ case 46:
+ CRCtriplet(crc, next, -46);
+ /* fallthrough */
+ case 45:
+ CRCtriplet(crc, next, -45);
+ /* fallthrough */
+ case 44:
+ CRCtriplet(crc, next, -44);
+ /* fallthrough */
+ case 43:
+ CRCtriplet(crc, next, -43);
+ /* fallthrough */
+ case 42:
+ CRCtriplet(crc, next, -42);
+ /* fallthrough */
+ case 41:
+ CRCtriplet(crc, next, -41);
+ /* fallthrough */
+ case 40:
+ CRCtriplet(crc, next, -40);
+ /* fallthrough */
+ case 39:
+ CRCtriplet(crc, next, -39);
+ /* fallthrough */
+ case 38:
+ CRCtriplet(crc, next, -38);
+ /* fallthrough */
+ case 37:
+ CRCtriplet(crc, next, -37);
+ /* fallthrough */
+ case 36:
+ CRCtriplet(crc, next, -36);
+ /* fallthrough */
+ case 35:
+ CRCtriplet(crc, next, -35);
+ /* fallthrough */
+ case 34:
+ CRCtriplet(crc, next, -34);
+ /* fallthrough */
+ case 33:
+ CRCtriplet(crc, next, -33);
+ /* fallthrough */
+ case 32:
+ CRCtriplet(crc, next, -32);
+ /* fallthrough */
+ case 31:
+ CRCtriplet(crc, next, -31);
+ /* fallthrough */
+ case 30:
+ CRCtriplet(crc, next, -30);
+ /* fallthrough */
+ case 29:
+ CRCtriplet(crc, next, -29);
+ /* fallthrough */
+ case 28:
+ CRCtriplet(crc, next, -28);
+ /* fallthrough */
+ case 27:
+ CRCtriplet(crc, next, -27);
+ /* fallthrough */
+ case 26:
+ CRCtriplet(crc, next, -26);
+ /* fallthrough */
+ case 25:
+ CRCtriplet(crc, next, -25);
+ /* fallthrough */
+ case 24:
+ CRCtriplet(crc, next, -24);
+ /* fallthrough */
+ case 23:
+ CRCtriplet(crc, next, -23);
+ /* fallthrough */
+ case 22:
+ CRCtriplet(crc, next, -22);
+ /* fallthrough */
+ case 21:
+ CRCtriplet(crc, next, -21);
+ /* fallthrough */
+ case 20:
+ CRCtriplet(crc, next, -20);
+ /* fallthrough */
+ case 19:
+ CRCtriplet(crc, next, -19);
+ /* fallthrough */
+ case 18:
+ CRCtriplet(crc, next, -18);
+ /* fallthrough */
+ case 17:
+ CRCtriplet(crc, next, -17);
+ /* fallthrough */
+ case 16:
+ CRCtriplet(crc, next, -16);
+ /* fallthrough */
+ case 15:
+ CRCtriplet(crc, next, -15);
+ /* fallthrough */
+ case 14:
+ CRCtriplet(crc, next, -14);
+ /* fallthrough */
+ case 13:
+ CRCtriplet(crc, next, -13);
+ /* fallthrough */
+ case 12:
+ CRCtriplet(crc, next, -12);
+ /* fallthrough */
+ case 11:
+ CRCtriplet(crc, next, -11);
+ /* fallthrough */
+ case 10:
+ CRCtriplet(crc, next, -10);
+ /* fallthrough */
+ case 9:
+ CRCtriplet(crc, next, -9);
+ /* fallthrough */
+ case 8:
+ CRCtriplet(crc, next, -8);
+ /* fallthrough */
+ case 7:
+ CRCtriplet(crc, next, -7);
+ /* fallthrough */
+ case 6:
+ CRCtriplet(crc, next, -6);
+ /* fallthrough */
+ case 5:
+ CRCtriplet(crc, next, -5);
+ /* fallthrough */
+ case 4:
+ CRCtriplet(crc, next, -4);
+ /* fallthrough */
+ case 3:
+ CRCtriplet(crc, next, -3);
+ /* fallthrough */
+ case 2:
+ CRCtriplet(crc, next, -2);
+ /* fallthrough */
+ case 1:
+ CRCduplet(crc, next, -1); // the final triplet is actually only 2
+ //{ CombineCRC(); }
+ crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
+ if (--n > 0) {
+ crc1 = crc2 = 0;
+ block_size = 128;
+ // points to the first byte of the next block
+ next0 = next2 + 128;
+ next1 = next0 + 128; // from here on all blocks are 128 long
+ next2 = next1 + 128;
+ }
+ /* fallthrough */
+ case 0:;
+ } while (n > 0);
+ }
+ next = (const unsigned char*)next2;
+ }
+ uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
+ len = len & 7;
+ next += (count2 * 8);
+ switch (count2) {
+ case 27:
+ CRCsinglet(crc0, next, -27 * 8);
+ /* fallthrough */
+ case 26:
+ CRCsinglet(crc0, next, -26 * 8);
+ /* fallthrough */
+ case 25:
+ CRCsinglet(crc0, next, -25 * 8);
+ /* fallthrough */
+ case 24:
+ CRCsinglet(crc0, next, -24 * 8);
+ /* fallthrough */
+ case 23:
+ CRCsinglet(crc0, next, -23 * 8);
+ /* fallthrough */
+ case 22:
+ CRCsinglet(crc0, next, -22 * 8);
+ /* fallthrough */
+ case 21:
+ CRCsinglet(crc0, next, -21 * 8);
+ /* fallthrough */
+ case 20:
+ CRCsinglet(crc0, next, -20 * 8);
+ /* fallthrough */
+ case 19:
+ CRCsinglet(crc0, next, -19 * 8);
+ /* fallthrough */
+ case 18:
+ CRCsinglet(crc0, next, -18 * 8);
+ /* fallthrough */
+ case 17:
+ CRCsinglet(crc0, next, -17 * 8);
+ /* fallthrough */
+ case 16:
+ CRCsinglet(crc0, next, -16 * 8);
+ /* fallthrough */
+ case 15:
+ CRCsinglet(crc0, next, -15 * 8);
+ /* fallthrough */
+ case 14:
+ CRCsinglet(crc0, next, -14 * 8);
+ /* fallthrough */
+ case 13:
+ CRCsinglet(crc0, next, -13 * 8);
+ /* fallthrough */
+ case 12:
+ CRCsinglet(crc0, next, -12 * 8);
+ /* fallthrough */
+ case 11:
+ CRCsinglet(crc0, next, -11 * 8);
+ /* fallthrough */
+ case 10:
+ CRCsinglet(crc0, next, -10 * 8);
+ /* fallthrough */
+ case 9:
+ CRCsinglet(crc0, next, -9 * 8);
+ /* fallthrough */
+ case 8:
+ CRCsinglet(crc0, next, -8 * 8);
+ /* fallthrough */
+ case 7:
+ CRCsinglet(crc0, next, -7 * 8);
+ /* fallthrough */
+ case 6:
+ CRCsinglet(crc0, next, -6 * 8);
+ /* fallthrough */
+ case 5:
+ CRCsinglet(crc0, next, -5 * 8);
+ /* fallthrough */
+ case 4:
+ CRCsinglet(crc0, next, -4 * 8);
+ /* fallthrough */
+ case 3:
+ CRCsinglet(crc0, next, -3 * 8);
+ /* fallthrough */
+ case 2:
+ CRCsinglet(crc0, next, -2 * 8);
+ /* fallthrough */
+ case 1:
+ CRCsinglet(crc0, next, -1 * 8);
+ /* fallthrough */
+ case 0:;
+ }
+ }
+ {
+ align_to_8(len, crc0, next);
+ return (uint32_t)crc0 ^ 0xffffffffu;
+ }
+}
diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc
index 5f8344b4f9d..bbafa1230f8 100644
--- a/mysys/crc32ieee.cc
+++ b/mysys/crc32ieee.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2020, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -39,25 +39,23 @@ typedef unsigned int (*my_crc32_t)(unsigned int, const void *, size_t);
static my_crc32_t init_crc32()
{
- my_crc32_t func= my_crc32_zlib;
#ifdef HAVE_PCLMUL
if (crc32_pclmul_enabled())
- func = crc32_pclmul;
+ return crc32_pclmul;
#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
if (crc32_aarch64_available())
- func= crc32_aarch64;
+ return crc32_aarch64;
#endif
- return func;
+ return my_crc32_zlib;
}
static const my_crc32_t my_checksum_func= init_crc32();
-#ifndef __powerpc64__
-/* For powerpc, my_checksum is defined elsewhere.*/
-extern "C" unsigned int my_checksum(unsigned int crc, const void *data, size_t len)
+#ifdef __powerpc64__
+# error "my_checksum() is defined in mysys/crc32/crc32_ppc64.c"
+#endif
+extern "C"
+unsigned int my_checksum(unsigned int crc, const void *data, size_t len)
{
return my_checksum_func(crc, data, len);
}
-#endif
-
-