summaryrefslogtreecommitdiff
path: root/util/coding.h
diff options
context:
space:
mode:
Diffstat (limited to 'util/coding.h')
-rw-r--r--util/coding.h102
1 files changed, 81 insertions, 21 deletions
diff --git a/util/coding.h b/util/coding.h
index d9eeaa3..92a961f 100644
--- a/util/coding.h
+++ b/util/coding.h
@@ -10,9 +10,8 @@
#ifndef STORAGE_LEVELDB_UTIL_CODING_H_
#define STORAGE_LEVELDB_UTIL_CODING_H_
-#include <stdint.h>
-#include <string.h>
-
+#include <cstdint>
+#include <cstring>
#include <string>
#include "leveldb/slice.h"
@@ -44,44 +43,105 @@ const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);
int VarintLength(uint64_t v);
// Lower-level versions of Put... that write directly into a character buffer
-// REQUIRES: dst has enough space for the value being written
-void EncodeFixed32(char* dst, uint32_t value);
-void EncodeFixed64(char* dst, uint64_t value);
-
-// Lower-level versions of Put... that write directly into a character buffer
// and return a pointer just past the last byte written.
// REQUIRES: dst has enough space for the value being written
char* EncodeVarint32(char* dst, uint32_t value);
char* EncodeVarint64(char* dst, uint64_t value);
+// TODO(costan): Remove port::kLittleEndian and the fast paths based on
+// std::memcpy when clang learns to optimize the generic code, as
+// described in https://bugs.llvm.org/show_bug.cgi?id=41761
+//
+// The platform-independent code in DecodeFixed{32,64}() gets optimized to mov
+// on x86 and ldr on ARM64, by both clang and gcc. However, only gcc optimizes
+// the platform-independent code in EncodeFixed{32,64}() to mov / str.
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+
+inline void EncodeFixed32(char* dst, uint32_t value) {
+ uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+
+ if (port::kLittleEndian) {
+ // Fast path for little-endian CPUs. All major compilers optimize this to a
+ // single mov (x86_64) / str (ARM) instruction.
+ std::memcpy(buffer, &value, sizeof(uint32_t));
+ return;
+ }
+
+ // Platform-independent code.
+ // Currently, only gcc optimizes this to a single mov / str instruction.
+ buffer[0] = static_cast<uint8_t>(value);
+ buffer[1] = static_cast<uint8_t>(value >> 8);
+ buffer[2] = static_cast<uint8_t>(value >> 16);
+ buffer[3] = static_cast<uint8_t>(value >> 24);
+}
+
+inline void EncodeFixed64(char* dst, uint64_t value) {
+ uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+
+ if (port::kLittleEndian) {
+ // Fast path for little-endian CPUs. All major compilers optimize this to a
+ // single mov (x86_64) / str (ARM) instruction.
+ std::memcpy(buffer, &value, sizeof(uint64_t));
+ return;
+ }
+
+ // Platform-independent code.
+ // Currently, only gcc optimizes this to a single mov / str instruction.
+ buffer[0] = static_cast<uint8_t>(value);
+ buffer[1] = static_cast<uint8_t>(value >> 8);
+ buffer[2] = static_cast<uint8_t>(value >> 16);
+ buffer[3] = static_cast<uint8_t>(value >> 24);
+ buffer[4] = static_cast<uint8_t>(value >> 32);
+ buffer[5] = static_cast<uint8_t>(value >> 40);
+ buffer[6] = static_cast<uint8_t>(value >> 48);
+ buffer[7] = static_cast<uint8_t>(value >> 56);
+}
+
// Lower-level versions of Get... that read directly from a character buffer
// without any bounds checking.
inline uint32_t DecodeFixed32(const char* ptr) {
+ const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+
if (port::kLittleEndian) {
- // Load the raw bytes
+ // Fast path for little-endian CPUs. All major compilers optimize this to a
+ // single mov (x86_64) / ldr (ARM) instruction.
uint32_t result;
- memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ std::memcpy(&result, buffer, sizeof(uint32_t));
return result;
- } else {
- return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
- (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
- (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
- (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
}
+
+ // Platform-independent code.
+ // Clang and gcc optimize this to a single mov / ldr instruction.
+ return (static_cast<uint32_t>(buffer[0])) |
+ (static_cast<uint32_t>(buffer[1]) << 8) |
+ (static_cast<uint32_t>(buffer[2]) << 16) |
+ (static_cast<uint32_t>(buffer[3]) << 24);
}
inline uint64_t DecodeFixed64(const char* ptr) {
+ const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+
if (port::kLittleEndian) {
- // Load the raw bytes
+ // Fast path for little-endian CPUs. All major compilers optimize this to a
+ // single mov (x86_64) / ldr (ARM) instruction.
uint64_t result;
- memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ std::memcpy(&result, buffer, sizeof(uint64_t));
return result;
- } else {
- uint64_t lo = DecodeFixed32(ptr);
- uint64_t hi = DecodeFixed32(ptr + 4);
- return (hi << 32) | lo;
}
+
+ // Platform-independent code.
+ // Clang and gcc optimize this to a single mov / ldr instruction.
+ return (static_cast<uint64_t>(buffer[0])) |
+ (static_cast<uint64_t>(buffer[1]) << 8) |
+ (static_cast<uint64_t>(buffer[2]) << 16) |
+ (static_cast<uint64_t>(buffer[3]) << 24) |
+ (static_cast<uint64_t>(buffer[4]) << 32) |
+ (static_cast<uint64_t>(buffer[5]) << 40) |
+ (static_cast<uint64_t>(buffer[6]) << 48) |
+ (static_cast<uint64_t>(buffer[7]) << 56);
}
// Internal routine for use by fallback path of GetVarint32Ptr