summaryrefslogtreecommitdiff
path: root/storage/innobase/ut/ut0crc32.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/ut/ut0crc32.cc')
-rw-r--r--storage/innobase/ut/ut0crc32.cc736
1 files changed, 566 insertions, 170 deletions
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 4d2d311ff48..44b1c4b30b4 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved.
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2010 Facebook, Inc. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,10 +64,9 @@ mysys/my_perf.c, contributed by Facebook under the following license.
/* The below CRC32 implementation is based on the implementation included with
* zlib with modifications to process 8 bytes at a time and using SSE 4.2
- * extentions when available. The polynomial constant has been changed to
+ * extensions when available. The polynomial constant has been changed to
* match the one used by SSE 4.2 and does not return the same value as the
- * version used by zlib. This implementation only supports 64-bit
- * little-endian processors. The original zlib copyright notice follows. */
+ * version used by zlib. The original zlib copyright notice follows. */
/* crc32.c -- compute the CRC-32 of a buf stream
* Copyright (C) 1995-2005 Mark Adler
@@ -79,27 +79,382 @@ mysys/my_perf.c, contributed by Facebook under the following license.
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
*/
-#include "univ.i"
+// First include (the generated) my_config.h, to get correct platform defines.
+#include "my_config.h"
+#include <string.h>
+
#include "ut0crc32.h"
-#if defined(__linux__) && defined(__powerpc__)
-/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
-#include <sys/auxv.h>
-#include <bits/hwcap.h>
-#endif /* defined(__linux__) && defined(__powerpc__) */
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+/** Pointer to CRC32 calculation function. */
+ut_crc32_func_t ut_crc32;
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+ut_crc32_func_t ut_crc32_legacy_big_endian;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Text description of CRC32 implementation */
+const char* ut_crc32_implementation;
+
+/** Swap the byte order of an 8 byte integer.
+@param[in] i 8-byte integer
+@return 8-byte integer */
+inline
+uint64_t
+ut_crc32_swap_byteorder(
+ uint64_t i)
+{
+ return(i << 56
+ | (i & 0x000000000000FF00ULL) << 40
+ | (i & 0x0000000000FF0000ULL) << 24
+ | (i & 0x00000000FF000000ULL) << 8
+ | (i & 0x000000FF00000000ULL) >> 8
+ | (i & 0x0000FF0000000000ULL) >> 24
+ | (i & 0x00FF000000000000ULL) >> 40
+ | i >> 56);
+}
-#include <string.h>
+/* CRC32 hardware implementation. */
+
+#ifdef HAVE_CRC32_VPMSUM
+extern "C" {
+unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+};
+UNIV_INLINE
+ib_uint32_t
+ut_crc32_power8(
+/*===========*/
+ const byte* buf, /*!< in: data over which to calculate CRC32 */
+ ulint len) /*!< in: data length */
+{
+ return crc32c_vpmsum(0, buf, len);
+}
+#endif
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+/********************************************************************//**
+Fetches CPU info */
+static
+void
+ut_cpuid(
+/*=====*/
+ uint32_t vend[3], /*!< out: CPU vendor */
+ uint32_t* model, /*!< out: CPU model */
+ uint32_t* family, /*!< out: CPU family */
+ uint32_t* stepping, /*!< out: CPU stepping */
+ uint32_t* features_ecx, /*!< out: CPU features ecx */
+ uint32_t* features_edx) /*!< out: CPU features edx */
+{
+ uint32_t sig;
+#ifdef _MSC_VER
+ int data[4];
+ __cpuid(data, 0);
+ /* ebx */
+ vend[0] = data[1];
+ /* edx */
+ vend[1] = data[3];
+ /* ecx */
+ vend[2] = data[2];
+
+ __cpuid(data, 1);
+ /* eax */
+ sig = data[0];
+ /* ecx */
+ *features_ecx = data[2];
+ /* edx */
+ *features_edx = data[3];
+#else
+ asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
+ asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
+ : "a" (1)
+ : "ebx");
+#endif
+
+ *model = ((sig >> 4) & 0xF);
+ *family = ((sig >> 8) & 0xF);
+ *stepping = (sig & 0xF);
+
+ if (memcmp(vend, "GenuineIntel", 12) == 0
+ || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+
+ *model += (((sig >> 16) & 0xF) << 4);
+ *family += ((sig >> 20) & 0xFF);
+ }
+}
+
+/** Calculate CRC32 over 8-bit data using a hardware/CPU instruction.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out] len remaining bytes, it will be decremented with 1 */
+inline
+void
+ut_crc32_8_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+#ifdef _MSC_VER
+ *crc = _mm_crc32_u8(*crc, (*data)[0]);
+#else
+ asm("crc32b %1, %0"
+ /* output operands */
+ : "+r" (*crc)
+ /* input operands */
+ : "rm" ((*data)[0]));
+#endif
+
+ (*data)++;
+ (*len)--;
+}
+
+/** Calculate CRC32 over a 64-bit integer using a hardware/CPU instruction.
+@param[in] crc crc32 checksum so far
+@param[in] data data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_hw(
+ uint32_t crc,
+ uint64_t data)
+{
+ uint64_t crc_64bit = crc;
+#ifdef _MSC_VER
+#ifdef _M_X64
+ crc_64bit = _mm_crc32_u64(crc_64bit, data);
+#elif defined(_M_IX86)
+ crc = _mm_crc32_u32(crc, static_cast<uint32_t>(data));
+ crc_64bit = _mm_crc32_u32(crc, static_cast<uint32_t>(data >> 32));
+#else
+#error Not Supported processors type.
+#endif
+#else
+ asm("crc32q %1, %0"
+ /* output operands */
+ : "+r" (crc_64bit)
+ /* input operands */
+ : "rm" (data));
+#endif
+
+ return(static_cast<uint32_t>(crc_64bit));
+}
+
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifdef WORDS_BIGENDIAN
+ /* Currently we only support x86_64 (little endian) CPUs. In case
+ some big endian CPU supports a CRC32 instruction, then maybe we will
+ need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+ /*
+ data_int = ut_crc32_swap_byteorder(data_int);
+ */
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_hw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_hw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#else
+ /* Currently we only support x86_64 (little endian) CPUs. In case
+ some big endian CPU supports a CRC32 instruction, then maybe we will
+ NOT need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_hw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Calculates CRC32 using hardware/CPU instructions.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_hw(
+ const byte* buf,
+ ulint len)
+{
+ uint32_t crc = 0xFFFFFFFFU;
+
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ /* Perf testing
+ ./unittest/gunit/innodb/merge_innodb_tests-t --gtest_filter=ut0crc32.perf
+ on CPU "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz"
+ with different N in "while (len >= N) {" shows:
+ N=16
+ 2.867254 sec
+ 2.866860 sec
+ 2.867973 sec
+
+ N=32
+ 2.715725 sec
+ 2.713008 sec
+ 2.712520 sec
+ (5.36% speedup over N=16)
+
+ N=64
+ 2.634140 sec
+ 2.636558 sec
+ 2.636488 sec
+ (2.88% speedup over N=32)
+
+ N=128
+ 2.599534 sec
+ 2.599919 sec
+ 2.598035 sec
+ (1.39% speedup over N=64)
+
+ N=256
+ 2.576993 sec
+ 2.576748 sec
+ 2.575700 sec
+ (0.87% speedup over N=128)
+
+ N=512
+ 2.693928 sec
+ 2.691663 sec
+ 2.692142 sec
+ (4.51% slowdown over N=256)
+ */
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ ut_crc32_64_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 8) {
+ ut_crc32_64_hw(&crc, &buf, &len);
+ }
+
+ while (len > 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ return(~crc);
+}
+
+# ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 using hardware/CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_legacy_big_endian_hw(
+ const byte* buf,
+ ulint len)
+{
+ uint32_t crc = 0xFFFFFFFFU;
-ib_ut_crc32_t ut_crc32;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ }
+
+ while (len >= 8) {
+ ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+ }
+
+ while (len > 0) {
+ ut_crc32_8_hw(&crc, &buf, &len);
+ }
+
+ return(~crc);
+}
+# endif /* INNODB_BUG_ENDIAN_CRC32 */
+#endif /* defined(__GNUC__) && defined(__x86_64__) || (_WIN64) */
+
+/* CRC32 software implementation. */
/* Precalculated table used to generate the CRC32 if the CPU does not
have support for it */
-static ib_uint32_t ut_crc32_slice8_table[8][256];
-static ibool ut_crc32_slice8_table_initialized = FALSE;
-
-/* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool ut_crc32_sse2_enabled = false;
-UNIV_INTERN bool ut_crc32_power8_enabled = false;
+static uint32_t ut_crc32_slice8_table[8][256];
+static bool ut_crc32_slice8_table_initialized = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
@@ -110,10 +465,10 @@ ut_crc32_slice8_table_init()
/*========================*/
{
/* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
- static const ib_uint32_t poly = 0x82f63b78;
- ib_uint32_t n;
- ib_uint32_t k;
- ib_uint32_t c;
+ static const uint32_t poly = 0x82f63b78;
+ uint32_t n;
+ uint32_t k;
+ uint32_t c;
for (n = 0; n < 256; n++) {
c = n;
@@ -131,206 +486,247 @@ ut_crc32_slice8_table_init()
}
}
- ut_crc32_slice8_table_initialized = TRUE;
+ ut_crc32_slice8_table_initialized = true;
}
-#if defined(__GNUC__) && defined(__x86_64__)
-/********************************************************************//**
-Fetches CPU info */
-static
+/** Calculate CRC32 over 8-bit data using a software implementation.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out] len remaining bytes, it will be decremented with 1 */
+inline
void
-ut_cpuid(
-/*=====*/
- ib_uint32_t vend[3], /*!< out: CPU vendor */
- ib_uint32_t* model, /*!< out: CPU model */
- ib_uint32_t* family, /*!< out: CPU family */
- ib_uint32_t* stepping, /*!< out: CPU stepping */
- ib_uint32_t* features_ecx, /*!< out: CPU features ecx */
- ib_uint32_t* features_edx) /*!< out: CPU features edx */
+ut_crc32_8_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
{
- ib_uint32_t sig;
- asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
- asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
- : "a" (1)
- : "ebx");
+ const uint8_t i = (*crc ^ (*data)[0]) & 0xFF;
- *model = ((sig >> 4) & 0xF);
- *family = ((sig >> 8) & 0xF);
- *stepping = (sig & 0xF);
+ *crc = (*crc >> 8) ^ ut_crc32_slice8_table[0][i];
- if (memcmp(vend, "GenuineIntel", 12) == 0
- || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+ (*data)++;
+ (*len)--;
+}
- *model += (((sig >> 16) & 0xF) << 4);
- *family += ((sig >> 20) & 0xFF);
- }
+/** Calculate CRC32 over a 64-bit integer using a software implementation.
+@param[in] crc crc32 checksum so far
+@param[in] data data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_sw(
+ uint32_t crc,
+ uint64_t data)
+{
+ const uint64_t i = crc ^ data;
+
+ return(
+ ut_crc32_slice8_table[7][(i ) & 0xFF] ^
+ ut_crc32_slice8_table[6][(i >> 8) & 0xFF] ^
+ ut_crc32_slice8_table[5][(i >> 16) & 0xFF] ^
+ ut_crc32_slice8_table[4][(i >> 24) & 0xFF] ^
+ ut_crc32_slice8_table[3][(i >> 32) & 0xFF] ^
+ ut_crc32_slice8_table[2][(i >> 40) & 0xFF] ^
+ ut_crc32_slice8_table[1][(i >> 48) & 0xFF] ^
+ ut_crc32_slice8_table[0][(i >> 56)]
+ );
}
-/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_byte \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len--, buf++
-
-/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_quadword \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len -= 8, buf += 8
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__powerpc__)
-extern "C" {
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
-};
-#endif /* __powerpc__ */
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
+{
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_power8(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+#ifdef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_sw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out] crc crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out] data data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out] len remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_sw(
+ uint32_t* crc,
+ const byte** data,
+ ulint* len)
{
-#if defined(__powerpc__) && !defined(WORDS_BIGENDIAN)
- return crc32_vpmsum(0, buf, len);
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* __powerpc__ */
+ uint64_t data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+ data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+ *crc = ut_crc32_64_low_sw(*crc, data_int);
+
+ *data += 8;
+ *len -= 8;
}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
-/********************************************************************//**
-Calculates CRC32 using CPU instructions.
+/** Calculates CRC32 in software, without using CPU instructions.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_sse42(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+uint32_t
+ut_crc32_sw(
+ const byte* buf,
+ ulint len)
{
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint64_t crc = (ib_uint32_t) (-1);
+ uint32_t crc = 0xFFFFFFFFU;
- ut_a(ut_crc32_sse2_enabled);
+ ut_a(ut_crc32_slice8_table_initialized);
- while (len && ((ulint) buf & 7)) {
- ut_crc32_sse42_byte;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- while (len >= 32) {
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
+ ut_crc32_64_sw(&crc, &buf, &len);
}
while (len >= 8) {
- ut_crc32_sse42_quadword;
+ ut_crc32_64_sw(&crc, &buf, &len);
}
- while (len) {
- ut_crc32_sse42_byte;
+ while (len > 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
+ return(~crc);
}
-#define ut_crc32_slice8_byte \
- crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \
- len--
-
-#define ut_crc32_slice8_quadword \
- crc ^= *(ib_uint64_t*) buf; \
- crc = ut_crc32_slice8_table[7][(crc ) & 0xFF] ^ \
- ut_crc32_slice8_table[6][(crc >> 8) & 0xFF] ^ \
- ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \
- ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \
- ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \
- ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \
- ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \
- ut_crc32_slice8_table[0][(crc >> 56)]; \
- len -= 8, buf += 8
-
-/********************************************************************//**
-Calculates CRC32 manually.
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 in software, without using CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in] buf data over which to calculate CRC32
+@param[in] len data length
@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_slice8(
-/*============*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
+uint32_t
+ut_crc32_legacy_big_endian_sw(
+ const byte* buf,
+ ulint len)
{
- ib_uint64_t crc = (ib_uint32_t) (-1);
+ uint32_t crc = 0xFFFFFFFFU;
ut_a(ut_crc32_slice8_table_initialized);
- while (len && ((ulint) buf & 7)) {
- ut_crc32_slice8_byte;
+ /* Calculate byte-by-byte up to an 8-byte aligned address. After
+ this consume the input 8-bytes at a time. */
+ while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- while (len >= 32) {
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
+ while (len >= 128) {
+ /* This call is repeated 16 times. 16 * 8 = 128. */
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
}
while (len >= 8) {
- ut_crc32_slice8_quadword;
+ ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
}
- while (len) {
- ut_crc32_slice8_byte;
+ while (len > 0) {
+ ut_crc32_8_sw(&crc, &buf, &len);
}
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
+ return(~crc);
}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
/********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
void
ut_crc32_init()
/*===========*/
{
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint32_t vend[3];
- ib_uint32_t model;
- ib_uint32_t family;
- ib_uint32_t stepping;
- ib_uint32_t features_ecx;
- ib_uint32_t features_edx;
+ ut_crc32_slice8_table_init();
+ ut_crc32 = ut_crc32_sw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_sw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+ ut_crc32_implementation = "Using generic crc32 instructions";
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+ uint32_t vend[3];
+ uint32_t model;
+ uint32_t family;
+ uint32_t stepping;
+ uint32_t features_ecx;
+ uint32_t features_edx;
ut_cpuid(vend, &model, &family, &stepping,
&features_ecx, &features_edx);
- ut_crc32_sse2_enabled = (features_ecx >> 20) & 1;
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2) \
- && !defined(WORDS_BIGENDIAN)
- if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
- ut_crc32_power8_enabled = true;
-#endif /* defined(__linux__) && defined(__powerpc__) */
-
- if (ut_crc32_sse2_enabled) {
- ut_crc32 = ut_crc32_sse42;
- } else if (ut_crc32_power8_enabled) {
- ut_crc32 = ut_crc32_power8;
- } else {
- ut_crc32_slice8_table_init();
- ut_crc32 = ut_crc32_slice8;
+ if (features_ecx & 1 << 20) {
+ ut_crc32 = ut_crc32_hw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+ ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_hw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+ ut_crc32_implementation = "Using SSE2 crc32 instructions";
}
+
+#elif defined(HAVE_CRC32_VPMSUM)
+ ut_crc32 = ut_crc32_power8;
+ ut_crc32_implementation = "Using POWER8 crc32 instructions";
+#endif
+
}