1 files changed, 566 insertions, 170 deletions
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 4d2d311ff48..44b1c4b30b4 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved.
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2010 Facebook, Inc. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2018, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,10 +64,9 @@ mysys/my_perf.c, contributed by Facebook under the following license.
 
 /* The below CRC32 implementation is based on the implementation included with
  * zlib with modifications to process 8 bytes at a time and using SSE 4.2
- * extentions when available.  The polynomial constant has been changed to
+ * extensions when available.  The polynomial constant has been changed to
  * match the one used by SSE 4.2 and does not return the same value as the
- * version used by zlib.  This implementation only supports 64-bit
- * little-endian processors.  The original zlib copyright notice follows. */
+ * version used by zlib.  The original zlib copyright notice follows. */
 
 /* crc32.c -- compute the CRC-32 of a buf stream
  * Copyright (C) 1995-2005 Mark Adler
@@ -79,27 +79,382 @@ mysys/my_perf.c, contributed by Facebook under the following license.
  * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
  */
 
-#include "univ.i"
+// First include (the generated) my_config.h, to get correct platform defines.
+#include "my_config.h"
+#include <string.h>
+
 #include "ut0crc32.h"
 
-#if defined(__linux__) && defined(__powerpc__)
-/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
-#include <sys/auxv.h>
-#include <bits/hwcap.h>
-#endif /* defined(__linux__) && defined(__powerpc__) */
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+/** Pointer to CRC32 calculation function. */
+ut_crc32_func_t	ut_crc32;
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+ut_crc32_func_t	ut_crc32_legacy_big_endian;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Text description of CRC32 implementation */
+const char*	ut_crc32_implementation;
+
+/** Swap the byte order of an 8 byte integer.
+@param[in]	i	8-byte integer
+@return 8-byte integer */
+inline
+uint64_t
+ut_crc32_swap_byteorder(
+	uint64_t	i)
+{
+	return(i << 56
+	       | (i & 0x000000000000FF00ULL) << 40
+	       | (i & 0x0000000000FF0000ULL) << 24
+	       | (i & 0x00000000FF000000ULL) << 8
+	       | (i & 0x000000FF00000000ULL) >> 8
+	       | (i & 0x0000FF0000000000ULL) >> 24
+	       | (i & 0x00FF000000000000ULL) >> 40
+	       | i >> 56);
+}
 
-#include <string.h>
+/* CRC32 hardware implementation. */
+
+#ifdef HAVE_CRC32_VPMSUM
+extern "C" {
+unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+};
+UNIV_INLINE
+ib_uint32_t
+ut_crc32_power8(
+/*===========*/
+		const byte*		buf,		/*!< in: data over which to calculate CRC32 */
+		ulint			len)		/*!< in: data length */
+{
+	return crc32c_vpmsum(0, buf, len);
+}
+#endif
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+/********************************************************************//**
+Fetches CPU info */
+static
+void
+ut_cpuid(
+/*=====*/
+	uint32_t	vend[3],	/*!< out: CPU vendor */
+	uint32_t*	model,		/*!< out: CPU model */
+	uint32_t*	family,		/*!< out: CPU family */
+	uint32_t*	stepping,	/*!< out: CPU stepping */
+	uint32_t*	features_ecx,	/*!< out: CPU features ecx */
+	uint32_t*	features_edx)	/*!< out: CPU features edx */
+{
+	uint32_t	sig;
+#ifdef _MSC_VER
+	int data[4];
+	__cpuid(data, 0);
+	/* ebx */
+	vend[0] = data[1];
+	/* edx */
+	vend[1] = data[3];
+	/* ecx */
+	vend[2] = data[2];
+
+	__cpuid(data, 1);
+	/* eax */
+	sig = data[0];
+	/* ecx */
+	*features_ecx = data[2];
+	/* edx */
+	*features_edx = data[3];
+#else
+	asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
+	asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
+	    : "a" (1)
+	    : "ebx");
+#endif
+
+	*model = ((sig >> 4) & 0xF);
+	*family = ((sig >> 8) & 0xF);
+	*stepping = (sig & 0xF);
+
+	if (memcmp(vend, "GenuineIntel", 12) == 0
+	    || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+
+		*model += (((sig >> 16) & 0xF) << 4);
+		*family += ((sig >> 20) & 0xFF);
+	}
+}
+
+/** Calculate CRC32 over 8-bit data using a hardware/CPU instruction.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out]	len	remaining bytes, it will be decremented with 1 */
+inline
+void
+ut_crc32_8_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+#ifdef _MSC_VER
+	*crc = _mm_crc32_u8(*crc, (*data)[0]);
+#else
+	asm("crc32b %1, %0"
+	    /* output operands */
+	    : "+r" (*crc)
+	    /* input operands */
+	    : "rm" ((*data)[0]));
+#endif
+
+	(*data)++;
+	(*len)--;
+}
+
+/** Calculate CRC32 over a 64-bit integer using a hardware/CPU instruction.
+@param[in]	crc	crc32 checksum so far
+@param[in]	data	data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_hw(
+	uint32_t	crc,
+	uint64_t	data)
+{
+	uint64_t	crc_64bit = crc;
+#ifdef _MSC_VER
+#ifdef _M_X64
+	crc_64bit = _mm_crc32_u64(crc_64bit, data);
+#elif defined(_M_IX86)
+	crc = _mm_crc32_u32(crc, static_cast<uint32_t>(data));
+	crc_64bit = _mm_crc32_u32(crc, static_cast<uint32_t>(data >> 32));
+#else
+#error Not Supported processors type.
+#endif
+#else
+	asm("crc32q %1, %0"
+	    /* output operands */
+	    : "+r" (crc_64bit)
+	    /* input operands */
+	    : "rm" (data));
+#endif
+
+	return(static_cast<uint32_t>(crc_64bit));
+}
+
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifdef WORDS_BIGENDIAN
+	/* Currently we only support x86_64 (little endian) CPUs. In case
+	some big endian CPU supports a CRC32 instruction, then maybe we will
+	need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+	/*
+	data_int = ut_crc32_swap_byteorder(data_int);
+	*/
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_hw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#else
+	/* Currently we only support x86_64 (little endian) CPUs. In case
+	some big endian CPU supports a CRC32 instruction, then maybe we will
+	NOT need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_hw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+
+/** Calculates CRC32 using hardware/CPU instructions.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_hw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	/* Perf testing
+	./unittest/gunit/innodb/merge_innodb_tests-t --gtest_filter=ut0crc32.perf
+	on CPU "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz"
+	with different N in "while (len >= N) {" shows:
+	N=16
+	2.867254 sec
+	2.866860 sec
+	2.867973 sec
+
+	N=32
+	2.715725 sec
+	2.713008 sec
+	2.712520 sec
+	(5.36% speedup over N=16)
+
+	N=64
+	2.634140 sec
+	2.636558 sec
+	2.636488 sec
+	(2.88% speedup over N=32)
+
+	N=128
+	2.599534 sec
+	2.599919 sec
+	2.598035 sec
+	(1.39% speedup over N=64)
+
+	N=256
+	2.576993 sec
+	2.576748 sec
+	2.575700 sec
+	(0.87% speedup over N=128)
+
+	N=512
+	2.693928 sec
+	2.691663 sec
+	2.692142 sec
+	(4.51% slowdown over N=256)
+	*/
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+	}
+
+	while (len >= 8) {
+		ut_crc32_64_hw(&crc, &buf, &len);
+	}
+
+	while (len > 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	return(~crc);
+}
+
+# ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 using hardware/CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_legacy_big_endian_hw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
 
-ib_ut_crc32_t	ut_crc32;
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+	}
+
+	while (len >= 8) {
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+	}
+
+	while (len > 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	return(~crc);
+}
+# endif /* INNODB_BUG_ENDIAN_CRC32 */
+#endif /* defined(__GNUC__) && defined(__x86_64__) || (_WIN64) */
+
+/* CRC32 software implementation. */
 
 /* Precalculated table used to generate the CRC32 if the CPU does not
 have support for it */
-static ib_uint32_t	ut_crc32_slice8_table[8][256];
-static ibool		ut_crc32_slice8_table_initialized = FALSE;
-
-/* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool	ut_crc32_sse2_enabled = false;
-UNIV_INTERN bool		 ut_crc32_power8_enabled = false;
+static uint32_t	ut_crc32_slice8_table[8][256];
+static bool	ut_crc32_slice8_table_initialized = false;
 
 /********************************************************************//**
 Initializes the table that is used to generate the CRC32 if the CPU does
@@ -110,10 +465,10 @@ ut_crc32_slice8_table_init()
 /*========================*/
 {
 	/* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
-	static const ib_uint32_t	poly = 0x82f63b78;
-	ib_uint32_t			n;
-	ib_uint32_t			k;
-	ib_uint32_t			c;
+	static const uint32_t	poly = 0x82f63b78;
+	uint32_t		n;
+	uint32_t		k;
+	uint32_t		c;
 
 	for (n = 0; n < 256; n++) {
 		c = n;
@@ -131,206 +486,247 @@ ut_crc32_slice8_table_init()
 		}
 	}
 
-	ut_crc32_slice8_table_initialized = TRUE;
+	ut_crc32_slice8_table_initialized = true;
 }
 
-#if defined(__GNUC__) && defined(__x86_64__)
-/********************************************************************//**
-Fetches CPU info */
-static
+/** Calculate CRC32 over 8-bit data using a software implementation.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out]	len	remaining bytes, it will be decremented with 1 */
+inline
 void
-ut_cpuid(
-/*=====*/
-	ib_uint32_t	vend[3],	/*!< out: CPU vendor */
-	ib_uint32_t*	model,		/*!< out: CPU model */
-	ib_uint32_t*	family,		/*!< out: CPU family */
-	ib_uint32_t*	stepping,	/*!< out: CPU stepping */
-	ib_uint32_t*	features_ecx,	/*!< out: CPU features ecx */
-	ib_uint32_t*	features_edx)	/*!< out: CPU features edx */
+ut_crc32_8_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
 {
-	ib_uint32_t	sig;
-	asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
-	asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
-	    : "a" (1)
-	    : "ebx");
+	const uint8_t	i = (*crc ^ (*data)[0]) & 0xFF;
 
-	*model = ((sig >> 4) & 0xF);
-	*family = ((sig >> 8) & 0xF);
-	*stepping = (sig & 0xF);
+	*crc = (*crc >> 8) ^ ut_crc32_slice8_table[0][i];
 
-	if (memcmp(vend, "GenuineIntel", 12) == 0
-	    || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
+	(*data)++;
+	(*len)--;
+}
 
-		*model += (((sig >> 16) & 0xF) << 4);
-		*family += ((sig >> 20) & 0xFF);
-	}
+/** Calculate CRC32 over a 64-bit integer using a software implementation.
+@param[in]	crc	crc32 checksum so far
+@param[in]	data	data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_sw(
+	uint32_t	crc,
+	uint64_t	data)
+{
+	const uint64_t	i = crc ^ data;
+
+	return(
+		ut_crc32_slice8_table[7][(i      ) & 0xFF] ^
+		ut_crc32_slice8_table[6][(i >>  8) & 0xFF] ^
+		ut_crc32_slice8_table[5][(i >> 16) & 0xFF] ^
+		ut_crc32_slice8_table[4][(i >> 24) & 0xFF] ^
+		ut_crc32_slice8_table[3][(i >> 32) & 0xFF] ^
+		ut_crc32_slice8_table[2][(i >> 40) & 0xFF] ^
+		ut_crc32_slice8_table[1][(i >> 48) & 0xFF] ^
+		ut_crc32_slice8_table[0][(i >> 56)]
+	);
 }
 
-/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_byte \
-	asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \
-	    : "=c"(crc) : "c"(crc), "d"(buf)); \
-	len--, buf++
-
-/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_quadword \
-	asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \
-	    : "=c"(crc) : "c"(crc), "d"(buf)); \
-	len -= 8, buf += 8
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__powerpc__)
-extern "C" {
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
-};
-#endif /* __powerpc__ */
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
 
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_power8(
-/*===========*/
-		 const byte*		 buf,		 /*!< in: data over which to calculate CRC32 */
-		 ulint		 		 len)		 /*!< in: data length */
+#ifdef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_sw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
 {
-#if defined(__powerpc__) && !defined(WORDS_BIGENDIAN)
-  return crc32_vpmsum(0, buf, len);
-#else
-		 ut_error;
-		 /* silence compiler warning about unused parameters */
-		 return((ib_uint32_t) buf[len]);
-#endif /* __powerpc__ */
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_sw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
 }
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
 
-/********************************************************************//**
-Calculates CRC32 using CPU instructions.
+/** Calculates CRC32 in software, without using CPU instructions.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
 @return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_sse42(
-/*===========*/
-	const byte*	buf,	/*!< in: data over which to calculate CRC32 */
-	ulint		len)	/*!< in: data length */
+uint32_t
+ut_crc32_sw(
+	const byte*	buf,
+	ulint		len)
 {
-#if defined(__GNUC__) && defined(__x86_64__)
-	ib_uint64_t	crc = (ib_uint32_t) (-1);
+	uint32_t	crc = 0xFFFFFFFFU;
 
-	ut_a(ut_crc32_sse2_enabled);
+	ut_a(ut_crc32_slice8_table_initialized);
 
-	while (len && ((ulint) buf & 7)) {
-		ut_crc32_sse42_byte;
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	while (len >= 32) {
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
 	}
 
 	while (len >= 8) {
-		ut_crc32_sse42_quadword;
+		ut_crc32_64_sw(&crc, &buf, &len);
 	}
 
-	while (len) {
-		ut_crc32_sse42_byte;
+	while (len > 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-#else
-	ut_error;
-	/* silence compiler warning about unused parameters */
-	return((ib_uint32_t) buf[len]);
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
+	return(~crc);
 }
 
-#define ut_crc32_slice8_byte \
-	crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \
-	len--
-
-#define ut_crc32_slice8_quadword \
-	crc ^= *(ib_uint64_t*) buf; \
-	crc = ut_crc32_slice8_table[7][(crc      ) & 0xFF] ^ \
-	      ut_crc32_slice8_table[6][(crc >>  8) & 0xFF] ^ \
-	      ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \
-	      ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \
-	      ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \
-	      ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \
-	      ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \
-	      ut_crc32_slice8_table[0][(crc >> 56)]; \
-	len -= 8, buf += 8
-
-/********************************************************************//**
-Calculates CRC32 manually.
+#ifdef INNODB_BUG_ENDIAN_CRC32
+/** Calculates CRC32 in software, without using CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
 @return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_slice8(
-/*============*/
-	const byte*	buf,	/*!< in: data over which to calculate CRC32 */
-	ulint		len)	/*!< in: data length */
+uint32_t
+ut_crc32_legacy_big_endian_sw(
+	const byte*	buf,
+	ulint		len)
 {
-	ib_uint64_t	crc = (ib_uint32_t) (-1);
+	uint32_t	crc = 0xFFFFFFFFU;
 
 	ut_a(ut_crc32_slice8_table_initialized);
 
-	while (len && ((ulint) buf & 7)) {
-		ut_crc32_slice8_byte;
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	while (len >= 32) {
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
 	}
 
 	while (len >= 8) {
-		ut_crc32_slice8_quadword;
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
 	}
 
-	while (len) {
-		ut_crc32_slice8_byte;
+	while (len > 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
+	return(~crc);
 }
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
 
 /********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
 allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
 void
 ut_crc32_init()
 /*===========*/
 {
-#if defined(__GNUC__) && defined(__x86_64__)
-	ib_uint32_t	vend[3];
-	ib_uint32_t	model;
-	ib_uint32_t	family;
-	ib_uint32_t	stepping;
-	ib_uint32_t	features_ecx;
-	ib_uint32_t	features_edx;
+	ut_crc32_slice8_table_init();
+	ut_crc32 = ut_crc32_sw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+	ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_sw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+	ut_crc32_implementation = "Using generic crc32 instructions";
+
+#if (defined(__GNUC__) && defined(__x86_64__)) || defined(_MSC_VER)
+	uint32_t	vend[3];
+	uint32_t	model;
+	uint32_t	family;
+	uint32_t	stepping;
+	uint32_t	features_ecx;
+	uint32_t	features_edx;
 
 	ut_cpuid(vend, &model, &family, &stepping,
 		 &features_ecx, &features_edx);
 
-	ut_crc32_sse2_enabled = (features_ecx >> 20) & 1;
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-#if defined(__linux__) && defined(__powerpc__) && defined(AT_HWCAP2) \
-        && !defined(WORDS_BIGENDIAN)
-	if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07)
-		 ut_crc32_power8_enabled = true;
-#endif /* defined(__linux__) && defined(__powerpc__) */
-
-	if (ut_crc32_sse2_enabled) {
-		ut_crc32 = ut_crc32_sse42;
-	} else if (ut_crc32_power8_enabled) {
-	 	ut_crc32 = ut_crc32_power8;
-	} else {
-		ut_crc32_slice8_table_init();
-		ut_crc32 = ut_crc32_slice8;
+	if (features_ecx & 1 << 20) {
+		ut_crc32 = ut_crc32_hw;
+#ifdef INNODB_BUG_ENDIAN_CRC32
+		ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_hw;
+#endif /* INNODB_BUG_ENDIAN_CRC32 */
+		ut_crc32_implementation = "Using SSE2 crc32 instructions";
 	}
+
+#elif defined(HAVE_CRC32_VPMSUM)
+	ut_crc32 = ut_crc32_power8;
+	ut_crc32_implementation = "Using POWER8 crc32 instructions";
+#endif
+
 }