diff options
-rw-r--r-- | .travis.yml | 17 | ||||
-rw-r--r-- | cmake/config.h.in | 30 | ||||
-rw-r--r-- | snappy-internal.h | 39 | ||||
-rw-r--r-- | snappy-stubs-internal.h | 24 | ||||
-rw-r--r-- | snappy-test.cc | 2 | ||||
-rw-r--r-- | snappy-test.h | 18 | ||||
-rw-r--r-- | snappy.cc | 34 | ||||
-rw-r--r-- | snappy_test_tool.cc | 4 | ||||
-rw-r--r-- | snappy_unittest.cc | 4 |
9 files changed, 101 insertions, 71 deletions
diff --git a/.travis.yml b/.travis.yml index 2213d65..33b9072 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,10 +14,19 @@ os: - osx env: -- BUILD_TYPE=Debug CPU_LEVEL=AVX -- BUILD_TYPE=Debug CPU_LEVEL=AVX2 -- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX -- BUILD_TYPE=RelWithDebInfo CPU_LEVEL=AVX2 + jobs: + - BUILD_TYPE: Debug + CPU_LEVEL: BASELINE + - BUILD_TYPE: Debug + CPU_LEVEL: AVX + - BUILD_TYPE: Debug + CPU_LEVEL: AVX2 + - BUILD_TYPE: RelWithDebInfo + CPU_LEVEL: BASELINE + - BUILD_TYPE: RelWithDebInfo + CPU_LEVEL: AVX + - BUILD_TYPE: RelWithDebInfo + CPU_LEVEL: AVX2 jobs: exclude: diff --git a/cmake/config.h.in b/cmake/config.h.in index 568f69c..5ea2b5a 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -2,46 +2,46 @@ #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ /* Define to 1 if the compiler supports __attribute__((always_inline)). */ -#cmakedefine HAVE_ATTRIBUTE_ALWAYS_INLINE 1 +#cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE /* Define to 1 if the compiler supports __builtin_ctz and friends. */ -#cmakedefine HAVE_BUILTIN_CTZ 1 +#cmakedefine01 HAVE_BUILTIN_CTZ /* Define to 1 if the compiler supports __builtin_expect. */ -#cmakedefine HAVE_BUILTIN_EXPECT 1 +#cmakedefine01 HAVE_BUILTIN_EXPECT /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */ -#cmakedefine HAVE_FUNC_MMAP 1 +#cmakedefine01 HAVE_FUNC_MMAP /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */ -#cmakedefine HAVE_FUNC_SYSCONF 1 +#cmakedefine01 HAVE_FUNC_SYSCONF /* Define to 1 if you have the `lzo2' library (-llzo2). */ -#cmakedefine HAVE_LIBLZO2 1 +#cmakedefine01 HAVE_LIBLZO2 /* Define to 1 if you have the `z' library (-lz). */ -#cmakedefine HAVE_LIBZ 1 +#cmakedefine01 HAVE_LIBZ /* Define to 1 if you have the `lz4' library (-llz4). */ -#cmakedefine HAVE_LIBLZ4 1 +#cmakedefine01 HAVE_LIBLZ4 /* Define to 1 if you have the <sys/mman.h> header file. */ -#cmakedefine HAVE_SYS_MMAN_H 1 +#cmakedefine01 HAVE_SYS_MMAN_H /* Define to 1 if you have the <sys/resource.h> header file. */ -#cmakedefine HAVE_SYS_RESOURCE_H 1 +#cmakedefine01 HAVE_SYS_RESOURCE_H /* Define to 1 if you have the <sys/time.h> header file. */ -#cmakedefine HAVE_SYS_TIME_H 1 +#cmakedefine01 HAVE_SYS_TIME_H /* Define to 1 if you have the <sys/uio.h> header file. */ -#cmakedefine HAVE_SYS_UIO_H 1 +#cmakedefine01 HAVE_SYS_UIO_H /* Define to 1 if you have the <unistd.h> header file. */ -#cmakedefine HAVE_UNISTD_H 1 +#cmakedefine01 HAVE_UNISTD_H /* Define to 1 if you have the <windows.h> header file. */ -#cmakedefine HAVE_WINDOWS_H 1 +#cmakedefine01 HAVE_WINDOWS_H /* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */ #cmakedefine01 SNAPPY_HAVE_SSSE3 @@ -54,6 +54,6 @@ /* Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ -#cmakedefine SNAPPY_IS_BIG_ENDIAN 1 +#cmakedefine01 SNAPPY_IS_BIG_ENDIAN #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ diff --git a/snappy-internal.h b/snappy-internal.h index 71d19c0..ae7ab5a 100644 --- a/snappy-internal.h +++ b/snappy-internal.h @@ -33,40 +33,26 @@ #include "snappy-stubs-internal.h" -#if !defined(SNAPPY_HAVE_BMI2) -// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 -// specifically, but it does define __AVX2__ when AVX2 support is available. -// Fortunately, AVX2 was introduced in Haswell, just like BMI2. -// -// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, -// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which -// case issuing BMI2 instructions results in a compiler error. -#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) -#define SNAPPY_HAVE_BMI2 1 -#else -#define SNAPPY_HAVE_BMI2 0 -#endif -#endif // !defined(SNAPPY_HAVE_BMI2) - -#if SNAPPY_HAVE_BMI2 -// Please do not replace with <x86intrin.h>. or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include <immintrin.h> -#endif - #if SNAPPY_HAVE_SSSE3 +// Please do not replace with <x86intrin.h> or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include <emmintrin.h> #include <tmmintrin.h> #endif + #if SNAPPY_HAVE_NEON #include <arm_neon.h> #endif +#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON +#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1 +#else +#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0 +#endif + namespace snappy { namespace internal { -#if (SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON) -#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1 -#endif #if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE #if SNAPPY_HAVE_SSSE3 using V128 = __m128i; @@ -184,8 +170,9 @@ char* CompressFragment(const char* input, // loading from s2 + n. // // Separate implementation for 64-bit, little-endian cpus. -#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ - (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || defined(ARCH_ARM)) +#if !SNAPPY_IS_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \ + defined(ARCH_ARM)) static inline std::pair<size_t, bool> FindMatchLength(const char* s1, const char* s2, const char* s2_limit, diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h index c2a838f..7d43c92 100644 --- a/snappy-stubs-internal.h +++ b/snappy-stubs-internal.h @@ -31,7 +31,7 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H #include "config.h" #endif @@ -43,11 +43,11 @@ #include <limits> #include <string> -#ifdef HAVE_SYS_MMAN_H +#if HAVE_SYS_MMAN_H #include <sys/mman.h> #endif -#ifdef HAVE_UNISTD_H +#if HAVE_UNISTD_H #include <unistd.h> #endif @@ -90,20 +90,20 @@ #define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))} // Static prediction hints. -#ifdef HAVE_BUILTIN_EXPECT +#if HAVE_BUILTIN_EXPECT #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) #else #define SNAPPY_PREDICT_FALSE(x) x #define SNAPPY_PREDICT_TRUE(x) x -#endif +#endif // HAVE_BUILTIN_EXPECT // Inlining hints. -#ifdef HAVE_ATTRIBUTE_ALWAYS_INLINE +#if HAVE_ATTRIBUTE_ALWAYS_INLINE #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) #else #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE -#endif +#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE // Stubbed version of ABSL_FLAG. // @@ -235,11 +235,11 @@ class LittleEndian { } static inline constexpr bool IsLittleEndian() { -#if defined(SNAPPY_IS_BIG_ENDIAN) +#if SNAPPY_IS_BIG_ENDIAN return false; #else return true; -#endif // defined(SNAPPY_IS_BIG_ENDIAN) +#endif // SNAPPY_IS_BIG_ENDIAN } }; @@ -265,7 +265,7 @@ class Bits { void operator=(const Bits&); }; -#if defined(HAVE_BUILTIN_CTZ) +#if HAVE_BUILTIN_CTZ inline int Bits::Log2FloorNonZero(uint32_t n) { assert(n != 0); @@ -354,7 +354,7 @@ inline int Bits::FindLSBSetNonZero(uint32_t n) { #endif // End portable versions. -#if defined(HAVE_BUILTIN_CTZ) +#if HAVE_BUILTIN_CTZ inline int Bits::FindLSBSetNonZero64(uint64_t n) { assert(n != 0); @@ -388,7 +388,7 @@ inline int Bits::FindLSBSetNonZero64(uint64_t n) { } } -#endif // End portable version. +#endif // HAVE_BUILTIN_CTZ // Variable-length integer encoding. class Varint { diff --git a/snappy-test.cc b/snappy-test.cc index 7eb490a..aae6072 100644 --- a/snappy-test.cc +++ b/snappy-test.cc @@ -151,7 +151,7 @@ LogMessageCrash::~LogMessageCrash() { #pragma warning(pop) #endif -#ifdef HAVE_LIBZ +#if HAVE_LIBZ ZLib::ZLib() : comp_init_(false), diff --git a/snappy-test.h b/snappy-test.h index f80d343..65f3725 100644 --- a/snappy-test.h +++ b/snappy-test.h @@ -31,25 +31,25 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H #include "config.h" #endif #include "snappy-stubs-internal.h" -#ifdef HAVE_SYS_MMAN_H +#if HAVE_SYS_MMAN_H #include <sys/mman.h> #endif -#ifdef HAVE_SYS_RESOURCE_H +#if HAVE_SYS_RESOURCE_H #include <sys/resource.h> #endif -#ifdef HAVE_SYS_TIME_H +#if HAVE_SYS_TIME_H #include <sys/time.h> #endif -#ifdef HAVE_WINDOWS_H +#if HAVE_WINDOWS_H // Needed to be able to use std::max without workarounds in the source code. // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts #define NOMINMAX @@ -58,15 +58,15 @@ #define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0)) -#ifdef HAVE_LIBZ +#if HAVE_LIBZ #include "zlib.h" #endif -#ifdef HAVE_LIBLZO2 +#if HAVE_LIBLZO2 #include "lzo/lzo1x.h" #endif -#ifdef HAVE_LIBLZ4 +#if HAVE_LIBLZ4 #include "lz4.h" #endif @@ -216,7 +216,7 @@ class LogMessageVoidify { #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) #define CHECK_OK(cond) (cond).ok() -#ifdef HAVE_LIBZ +#if HAVE_LIBZ // Object-oriented wrapper around zlib. class ZLib { @@ -30,6 +30,27 @@ #include "snappy-sinksource.h" #include "snappy.h" +#if !defined(SNAPPY_HAVE_BMI2) +// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 +// specifically, but it does define __AVX2__ when AVX2 support is available. +// Fortunately, AVX2 was introduced in Haswell, just like BMI2. +// +// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, +// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which +// case issuing BMI2 instructions results in a compiler error. +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +#define SNAPPY_HAVE_BMI2 1 +#else +#define SNAPPY_HAVE_BMI2 0 +#endif +#endif // !defined(SNAPPY_HAVE_BMI2) + +#if SNAPPY_HAVE_BMI2 +// Please do not replace with <x86intrin.h>. or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include <immintrin.h> +#endif + #include <algorithm> #include <array> #include <cstddef> @@ -1060,6 +1081,10 @@ inline uint32_t ExtractOffset(uint32_t val, size_t tag_type) { reinterpret_cast<const char*>(&kExtractMasksCombined) + 2 * tag_type, sizeof(result)); return val & result; +#elif defined(__aarch64__) + constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull; + return val & static_cast<uint32_t>( + (kExtractMasksCombined >> (tag_type * 16)) & 0xFFFF); #else static constexpr uint32_t kExtractMasks[4] = {0, 0xFF, 0xFFFF, 0}; return val & kExtractMasks[tag_type]; @@ -1087,6 +1112,15 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless( // ip points just past the tag and we are touching at maximum kSlopBytes // in an iteration. size_t tag = ip[-1]; +#if defined(__clang__) && defined(__aarch64__) + // Workaround for https://bugs.llvm.org/show_bug.cgi?id=51317 + // when loading 1 byte, clang for aarch64 doesn't realize that it(ldrb) + // comes with free zero-extension, so clang generates another + // 'and xn, xm, 0xff' before it use that as the offset. This 'and' is + // redundant and can be removed by adding this dummy asm, which gives + // clang a hint that we're doing the zero-extension at the load. + asm("" ::"r"(tag)); +#endif do { // The throughput is limited by instructions, unrolling the inner loop // twice reduces the amount of instructions checking limits and also diff --git a/snappy_test_tool.cc b/snappy_test_tool.cc index 24ac1ee..a7c779b 100644 --- a/snappy_test_tool.cc +++ b/snappy_test_tool.cc @@ -66,7 +66,7 @@ namespace snappy { namespace { -#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) +#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // To test against code that reads beyond its input, this class copies a // string to a newly allocated group of pages, the last of which @@ -112,7 +112,7 @@ class DataEndingAtUnreadablePage { size_t size_; }; -#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) +#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // Fallback for systems without mmap. using DataEndingAtUnreadablePage = std::string; diff --git a/snappy_unittest.cc b/snappy_unittest.cc index 8782b52..292004c 100644 --- a/snappy_unittest.cc +++ b/snappy_unittest.cc @@ -50,7 +50,7 @@ namespace snappy { namespace { -#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) +#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // To test against code that reads beyond its input, this class copies a // string to a newly allocated group of pages, the last of which @@ -96,7 +96,7 @@ class DataEndingAtUnreadablePage { size_t size_; }; -#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) +#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF // Fallback for systems without mmap. using DataEndingAtUnreadablePage = std::string; |