summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rwxr-xr-xbuild_detect_platform30
-rw-r--r--port/port_example.h6
-rw-r--r--port/port_posix.h2
-rw-r--r--port/port_posix_sse.cc125
-rw-r--r--util/crc32c.cc18
6 files changed, 186 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 07a5a1e..66e3226 100644
--- a/Makefile
+++ b/Makefile
@@ -412,3 +412,9 @@ $(SHARED_OUTDIR)/%.o: %.cc
$(SHARED_OUTDIR)/%.o: %.c
$(CC) $(CFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
+
+$(STATIC_OUTDIR)/port/port_posix_sse.o: port/port_posix_sse.cc
+ $(CXX) $(CXXFLAGS) $(PLATFORM_SSEFLAGS) -c $< -o $@
+
+$(SHARED_OUTDIR)/port/port_posix_sse.o: port/port_posix_sse.cc
+ $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(PLATFORM_SSEFLAGS) -c $< -o $@
diff --git a/build_detect_platform b/build_detect_platform
index f062993..d2a20ce 100755
--- a/build_detect_platform
+++ b/build_detect_platform
@@ -63,6 +63,7 @@ PLATFORM_SHARED_EXT="so"
PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
PLATFORM_SHARED_CFLAGS="-fPIC"
PLATFORM_SHARED_VERSIONED=true
+PLATFORM_SSEFLAGS=
MEMCMP_FLAG=
if [ "$CXX" = "g++" ]; then
@@ -77,6 +78,7 @@ case "$TARGET_OS" in
COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN"
PLATFORM_LDFLAGS="-lpthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
Darwin)
PLATFORM=OS_MACOSX
@@ -85,48 +87,56 @@ case "$TARGET_OS" in
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
Linux)
PLATFORM=OS_LINUX
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
SunOS)
PLATFORM=OS_SOLARIS
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
PLATFORM_LIBS="-lpthread -lrt"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
FreeBSD)
PLATFORM=OS_FREEBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
PLATFORM_LIBS="-lpthread -lgcc_s"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
OpenBSD)
PLATFORM=OS_OPENBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
DragonFly)
PLATFORM=OS_DRAGONFLYBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
;;
OS_ANDROID_CROSSCOMPILE)
PLATFORM=OS_ANDROID
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
CROSS_COMPILE=true
;;
HP-UX)
@@ -134,6 +144,7 @@ case "$TARGET_OS" in
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
# man ld: +h internal_name
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
;;
@@ -142,6 +153,7 @@ case "$TARGET_OS" in
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PORT_FILE=port/port_posix.cc
+ PORT_SSE_FILE=port/port_posix_sse.cc
PLATFORM_SHARED_EXT=
PLATFORM_SHARED_LDFLAGS=
PLATFORM_SHARED_CFLAGS=
@@ -168,7 +180,7 @@ set +f # re-enable globbing
# The sources consist of the portable files, plus the platform-specific port
# file.
-echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
+echo "SOURCES=$PORTABLE_FILES $PORT_FILE $PORT_SSE_FILE" >> $OUTPUT
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
if [ "$CROSS_COMPILE" = "true" ]; then
@@ -210,6 +222,21 @@ EOF
fi
rm -f $CXXOUTPUT 2>/dev/null
+
+ # Test if gcc SSE 4.2 is supported
+ $CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT -msse4.2 2>/dev/null <<EOF
+ int main() {}
+EOF
+ if [ "$?" = 0 ]; then
+ PLATFORM_SSEFLAGS="-msse4.2"
+ fi
+
+ rm -f $CXXOUTPUT 2>/dev/null
+fi
+
+# Use the SSE 4.2 CRC32C intrinsics iff runtime checks indicate compiler supports them.
+if [ -n "$PLATFORM_SSEFLAGS" ]; then
+ PLATFORM_SSEFLAGS="$PLATFORM_SSEFLAGS -DLEVELDB_PLATFORM_POSIX_SSE"
fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
@@ -222,6 +249,7 @@ echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
echo "PLATFORM_LIBS=$PLATFORM_LIBS" >> $OUTPUT
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
+echo "PLATFORM_SSEFLAGS=$PLATFORM_SSEFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
diff --git a/port/port_example.h b/port/port_example.h
index ab9e489..97bd669 100644
--- a/port/port_example.h
+++ b/port/port_example.h
@@ -129,6 +129,12 @@ extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
// The concatenation of all "data[0,n-1]" fragments is the heap profile.
extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
+// Extend the CRC to include the first n bytes of buf.
+//
+// Returns zero if the CRC cannot be extended using acceleration, else returns
+// the newly extended CRC value (which may also be zero).
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
+
} // namespace port
} // namespace leveldb
diff --git a/port/port_posix.h b/port/port_posix.h
index 89fc222..d67ab68 100644
--- a/port/port_posix.h
+++ b/port/port_posix.h
@@ -148,6 +148,8 @@ inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
return false;
}
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
+
} // namespace port
} // namespace leveldb
diff --git a/port/port_posix_sse.cc b/port/port_posix_sse.cc
new file mode 100644
index 0000000..57ec8fe
--- /dev/null
+++ b/port/port_posix_sse.cc
@@ -0,0 +1,125 @@
+// Copyright 2016 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A portable implementation of crc32c, optimized to handle
+// four bytes at a time.
+//
+// In a separate source file to allow this accelerated CRC32C function to be
+// compiled with the appropriate compiler flags to enable x86 SSE 4.2
+// instructions.
+
+#include <stdint.h>
+#include <string.h>
+#include "port/port.h"
+
+#if defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__GNUC__) && defined(__SSE4_2__)
+#include <nmmintrin.h>
+#include <cpuid.h>
+#endif
+
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+namespace leveldb {
+namespace port {
+
+#if defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
+static inline uint32_t LE_LOAD32(const uint8_t *p) {
+ // SSE is x86 only, so ensured that |p| is always little-endian.
+ uint32_t word;
+ memcpy(&word, p, sizeof(word));
+ return word;
+}
+
+// Used to fetch a naturally-aligned 64-bit word in little endian byte-order
+static inline uint64_t LE_LOAD64(const uint8_t *p) {
+ uint64_t dword;
+ memcpy(&dword, p, sizeof(dword));
+ return dword;
+}
+
+static inline bool HaveSSE42() {
+#if defined(_MSC_VER)
+ int cpu_info[4];
+ __cpuid(cpu_info, 1);
+ return (cpu_info[2] & (1 << 20)) != 0;
+#elif defined(__GNUC__)
+ unsigned int eax, ebx, ecx, edx;
+ __get_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (ecx & (1 << 20)) != 0;
+#else
+ return false;
+#endif
+}
+
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+// For further improvements see Intel publication at:
+// http://download.intel.com/design/intarch/papers/323405.pdf
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
+#if !defined(LEVELDB_PLATFORM_POSIX_SSE)
+ return 0;
+#else
+ static bool have = HaveSSE42();
+ if (!have) {
+ return 0;
+ }
+
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint32_t l = crc ^ 0xffffffffu;
+
+#define STEP1 do { \
+ l = _mm_crc32_u8(l, *p++); \
+} while (0)
+#define STEP4 do { \
+ l = _mm_crc32_u32(l, LE_LOAD32(p)); \
+ p += 4; \
+} while (0)
+#define STEP8 do { \
+ l = _mm_crc32_u64(l, LE_LOAD64(p)); \
+ p += 8; \
+} while (0)
+
+ if (size > 16) {
+ // Process unaligned bytes
+ for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
+ STEP1;
+ }
+
+ // _mm_crc32_u64 is only available on x64.
+#if defined(_M_X64) || defined(__x86_64__)
+ // Process 8 bytes at a time
+ while ((e-p) >= 8) {
+ STEP8;
+ }
+ // Process 4 bytes at a time
+ if ((e-p) >= 4) {
+ STEP4;
+ }
+#else // !(defined(_M_X64) || defined(__x86_64__))
+ // Process 4 bytes at a time
+ while ((e-p) >= 4) {
+ STEP4;
+ }
+#endif // defined(_M_X64) || defined(__x86_64__)
+ }
+ // Process the last few bytes
+ while (p != e) {
+ STEP1;
+ }
+#undef STEP8
+#undef STEP4
+#undef STEP1
+ return l ^ 0xffffffffu;
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+}
+
+} // namespace port
+} // namespace leveldb
diff --git a/util/crc32c.cc b/util/crc32c.cc
index 6db9e77..edd61cf 100644
--- a/util/crc32c.cc
+++ b/util/crc32c.cc
@@ -8,6 +8,8 @@
#include "util/crc32c.h"
#include <stdint.h>
+
+#include "port/port.h"
#include "util/coding.h"
namespace leveldb {
@@ -283,7 +285,23 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p));
}
+// Determine if the CPU running this program can accelerate the CRC32C
+// calculation.
+static bool CanAccelerateCRC32C() {
+ // port::AcceleretedCRC32C returns zero when unable to accelerate.
+ static const char kTestCRCBuffer[] = "TestCRCBuffer";
+ static const char kBufSize = sizeof(kTestCRCBuffer) - 1;
+ static const uint32_t kTestCRCValue = 0xdcbc59fa;
+
+ return port::AcceleratedCRC32C(0, kTestCRCBuffer, kBufSize) == kTestCRCValue;
+}
+
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
+ static bool accelerate = CanAccelerateCRC32C();
+ if (accelerate) {
+ return port::AcceleratedCRC32C(crc, buf, size);
+ }
+
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint32_t l = crc ^ 0xffffffffu;