summaryrefslogtreecommitdiff
path: root/port
diff options
context:
space:
mode:
authorcostan <costan@google.com>2017-02-27 14:29:18 -0800
committerVictor Costan <pwnall@chromium.org>2017-02-28 14:08:46 -0800
commitea175e28f8ef7f6a8f5931ebad1835d95ec466ed (patch)
tree8c52c8a394b42444589e4d3929093d03982f32ef /port
parent95cd743e5e71c7b06e7149a837e33b91309dfa48 (diff)
downloadleveldb-ea175e28f8ef7f6a8f5931ebad1835d95ec466ed.tar.gz
Implement support for Intel crc32 instruction (SSE 4.2)
This change authored by vadimskipin and submitted via: https://github.com/google/leveldb/pull/309 Changes made to support iOS builds and other architectures without support for SSE 4.2. db_bench reports original crc32 speed at: crc32c : 3.610 micros/op; 1082.0 MB/s (4K per op) with this change performance has increased to: crc32c : 0.843 micros/op; 4633.6 MB/s (4K per op) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=148694935
Diffstat (limited to 'port')
-rw-r--r--port/port_example.h6
-rw-r--r--port/port_posix.h2
-rw-r--r--port/port_posix_sse.cc125
3 files changed, 133 insertions, 0 deletions
diff --git a/port/port_example.h b/port/port_example.h
index ab9e489..97bd669 100644
--- a/port/port_example.h
+++ b/port/port_example.h
@@ -129,6 +129,12 @@ extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
// The concatenation of all "data[0,n-1]" fragments is the heap profile.
extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
+// Extend the CRC to include the first n bytes of buf.
+//
+// Returns zero if the CRC cannot be extended using acceleration, else returns
+// the newly extended CRC value (which may also be zero).
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
+
} // namespace port
} // namespace leveldb
diff --git a/port/port_posix.h b/port/port_posix.h
index 89fc222..d67ab68 100644
--- a/port/port_posix.h
+++ b/port/port_posix.h
@@ -148,6 +148,8 @@ inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
return false;
}
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
+
} // namespace port
} // namespace leveldb
diff --git a/port/port_posix_sse.cc b/port/port_posix_sse.cc
new file mode 100644
index 0000000..57ec8fe
--- /dev/null
+++ b/port/port_posix_sse.cc
@@ -0,0 +1,125 @@
+// Copyright 2016 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A portable implementation of crc32c, optimized to handle
+// four bytes at a time.
+//
+// In a separate source file to allow this accelerated CRC32C function to be
+// compiled with the appropriate compiler flags to enable x86 SSE 4.2
+// instructions.
+
+#include <stdint.h>
+#include <string.h>
+#include "port/port.h"
+
+#if defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__GNUC__) && defined(__SSE4_2__)
+#include <nmmintrin.h>
+#include <cpuid.h>
+#endif
+
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+namespace leveldb {
+namespace port {
+
+#if defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
+static inline uint32_t LE_LOAD32(const uint8_t *p) {
+ // SSE is x86 only, so ensured that |p| is always little-endian.
+ uint32_t word;
+ memcpy(&word, p, sizeof(word));
+ return word;
+}
+
+// Used to fetch a naturally-aligned 64-bit word in little endian byte-order
+static inline uint64_t LE_LOAD64(const uint8_t *p) {
+ uint64_t dword;
+ memcpy(&dword, p, sizeof(dword));
+ return dword;
+}
+
+static inline bool HaveSSE42() {
+#if defined(_MSC_VER)
+ int cpu_info[4];
+ __cpuid(cpu_info, 1);
+ return (cpu_info[2] & (1 << 20)) != 0;
+#elif defined(__GNUC__)
+ unsigned int eax, ebx, ecx, edx;
+ __get_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (ecx & (1 << 20)) != 0;
+#else
+ return false;
+#endif
+}
+
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+
+// For further improvements see Intel publication at:
+// http://download.intel.com/design/intarch/papers/323405.pdf
+uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
+#if !defined(LEVELDB_PLATFORM_POSIX_SSE)
+ return 0;
+#else
+ static bool have = HaveSSE42();
+ if (!have) {
+ return 0;
+ }
+
+ const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
+ const uint8_t *e = p + size;
+ uint32_t l = crc ^ 0xffffffffu;
+
+#define STEP1 do { \
+ l = _mm_crc32_u8(l, *p++); \
+} while (0)
+#define STEP4 do { \
+ l = _mm_crc32_u32(l, LE_LOAD32(p)); \
+ p += 4; \
+} while (0)
+#define STEP8 do { \
+ l = _mm_crc32_u64(l, LE_LOAD64(p)); \
+ p += 8; \
+} while (0)
+
+ if (size > 16) {
+ // Process unaligned bytes
+ for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
+ STEP1;
+ }
+
+ // _mm_crc32_u64 is only available on x64.
+#if defined(_M_X64) || defined(__x86_64__)
+ // Process 8 bytes at a time
+ while ((e-p) >= 8) {
+ STEP8;
+ }
+ // Process 4 bytes at a time
+ if ((e-p) >= 4) {
+ STEP4;
+ }
+#else // !(defined(_M_X64) || defined(__x86_64__))
+ // Process 4 bytes at a time
+ while ((e-p) >= 4) {
+ STEP4;
+ }
+#endif // defined(_M_X64) || defined(__x86_64__)
+ }
+ // Process the last few bytes
+ while (p != e) {
+ STEP1;
+ }
+#undef STEP8
+#undef STEP4
+#undef STEP1
+ return l ^ 0xffffffffu;
+#endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
+}
+
+} // namespace port
+} // namespace leveldb