diff options
author | Jan Lehnardt <jan@apache.org> | 2012-11-13 13:01:42 +0100 |
---|---|---|
committer | Jan Lehnardt <jan@apache.org> | 2013-01-12 19:42:59 +0100 |
commit | a4eb1b35f6d32390f17fde518dbfc3a4d97562f0 (patch) | |
tree | 3586f0b9d86c05afe75d93e086bc64163c4346f5 | |
parent | 7422882e63259e8d59b832b266d30b719fdabbd4 (diff) | |
download | couchdb-a4eb1b35f6d32390f17fde518dbfc3a4d97562f0.tar.gz |
Update snappy to 1.0.5
-rw-r--r-- | src/snappy/Makefile.am | 4 | ||||
-rw-r--r-- | src/snappy/google-snappy/config.h.in | 71 | ||||
-rw-r--r-- | src/snappy/google-snappy/snappy-sinksource.cc | 1 | ||||
-rw-r--r-- | src/snappy/google-snappy/snappy-sinksource.h | 1 | ||||
-rw-r--r-- | src/snappy/google-snappy/snappy-stubs-internal.h | 104 | ||||
-rw-r--r-- | src/snappy/google-snappy/snappy.cc | 212 | ||||
-rw-r--r-- | src/snappy/google-snappy/snappy.h | 4 | ||||
-rw-r--r-- | src/snappy/snappy.app.in | 2 |
8 files changed, 289 insertions, 110 deletions
diff --git a/src/snappy/Makefile.am b/src/snappy/Makefile.am index 23dbf1472..ff75f0705 100644 --- a/src/snappy/Makefile.am +++ b/src/snappy/Makefile.am @@ -10,8 +10,8 @@ ## License for the specific language governing permissions and limitations under ## the License. -snappyebindir = $(localerlanglibdir)/snappy-1.0.3/ebin -snappyprivdir = $(localerlanglibdir)/snappy-1.0.3/priv +snappyebindir = $(localerlanglibdir)/snappy-1.0.5/ebin +snappyprivdir = $(localerlanglibdir)/snappy-1.0.5/priv snappy_cxx_srcs = \ snappy_nif.cc \ diff --git a/src/snappy/google-snappy/config.h.in b/src/snappy/google-snappy/config.h.in index 52dad348c..28f57c28f 100644 --- a/src/snappy/google-snappy/config.h.in +++ b/src/snappy/google-snappy/config.h.in @@ -1,4 +1,4 @@ -/* src/snappy/google-snappy/config.h.in. Generated from configure.ac by autoheader. */ +/* config.h.in. Generated from configure.ac by autoheader. */ /* Define if building universal (internal helper macro) */ #undef AC_APPLE_UNIVERSAL_BUILD @@ -9,15 +9,36 @@ /* Define to 1 if the compiler supports __builtin_expect. */ #undef HAVE_BUILTIN_EXPECT -/* "Provide HTTP support to couchjs" */ -#undef HAVE_CURL +/* Define to 1 if you have the <byteswap.h> header file. */ +#undef HAVE_BYTESWAP_H /* Define to 1 if you have the <dlfcn.h> header file. */ #undef HAVE_DLFCN_H +/* Use the gflags package for command-line parsing. */ +#undef HAVE_GFLAGS + +/* Defined when Google Test is available. */ +#undef HAVE_GTEST + /* Define to 1 if you have the <inttypes.h> header file. */ #undef HAVE_INTTYPES_H +/* Define to 1 if you have the `fastlz' library (-lfastlz). */ +#undef HAVE_LIBFASTLZ + +/* Define to 1 if you have the `lzf' library (-llzf). */ +#undef HAVE_LIBLZF + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +#undef HAVE_LIBLZO2 + +/* Define to 1 if you have the `quicklz' library (-lquicklz). */ +#undef HAVE_LIBQUICKLZ + +/* Define to 1 if you have the `z' library (-lz). */ +#undef HAVE_LIBZ + /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H @@ -36,6 +57,12 @@ /* Define to 1 if you have the <string.h> header file. */ #undef HAVE_STRING_H +/* Define to 1 if you have the <sys/byteswap.h> header file. */ +#undef HAVE_SYS_BYTESWAP_H + +/* Define to 1 if you have the <sys/endian.h> header file. */ +#undef HAVE_SYS_ENDIAN_H + /* Define to 1 if you have the <sys/mman.h> header file. */ #undef HAVE_SYS_MMAN_H @@ -51,6 +78,9 @@ /* Define to 1 if you have the <unistd.h> header file. */ #undef HAVE_UNISTD_H +/* Define to 1 if you have the <windows.h> header file. */ +#undef HAVE_WINDOWS_H + /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR @@ -79,31 +109,6 @@ /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS -/* Use new JS_SetOperationCallback */ -#undef USE_JS_SETOPCB - -/* Enable extensions on AIX 3, Interix. */ -#ifndef _ALL_SOURCE -# undef _ALL_SOURCE -#endif -/* Enable GNU extensions on systems that have them. */ -#ifndef _GNU_SOURCE -# undef _GNU_SOURCE -#endif -/* Enable threading extensions on Solaris. */ -#ifndef _POSIX_PTHREAD_SEMANTICS -# undef _POSIX_PTHREAD_SEMANTICS -#endif -/* Enable extensions on HP NonStop. */ -#ifndef _TANDEM_SOURCE -# undef _TANDEM_SOURCE -#endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# undef __EXTENSIONS__ -#endif - - /* Version number of package */ #undef VERSION @@ -118,13 +123,3 @@ # undef WORDS_BIGENDIAN # endif #endif - -/* Define to 1 if on MINIX. */ -#undef _MINIX - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -#undef _POSIX_1_SOURCE - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -#undef _POSIX_SOURCE diff --git a/src/snappy/google-snappy/snappy-sinksource.cc b/src/snappy/google-snappy/snappy-sinksource.cc index 1017895f9..5844552cb 100644 --- a/src/snappy/google-snappy/snappy-sinksource.cc +++ b/src/snappy/google-snappy/snappy-sinksource.cc @@ -68,5 +68,4 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { return dest_; } - } diff --git a/src/snappy/google-snappy/snappy-sinksource.h b/src/snappy/google-snappy/snappy-sinksource.h index 430baeabb..faabfa1e6 100644 --- a/src/snappy/google-snappy/snappy-sinksource.h +++ b/src/snappy/google-snappy/snappy-sinksource.h @@ -60,6 +60,7 @@ class Sink { // The default implementation always returns the scratch buffer. virtual char* GetAppendBuffer(size_t length, char* scratch); + private: // No copying Sink(const Sink&); diff --git a/src/snappy/google-snappy/snappy-stubs-internal.h b/src/snappy/google-snappy/snappy-stubs-internal.h index 46ee23542..6033cdfb4 100644 --- a/src/snappy/google-snappy/snappy-stubs-internal.h +++ b/src/snappy/google-snappy/snappy-stubs-internal.h @@ -42,7 +42,7 @@ #include <stdlib.h> #include <string.h> -#ifdef HAVE_SYS_MMAN +#ifdef HAVE_SYS_MMAN_H #include <sys/mman.h> #endif @@ -86,10 +86,9 @@ using namespace std; // version (anyone who wants to regenerate it can just do the call // themselves within main()). #define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value; + bool FLAGS_ ## flag_name = default_value #define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name; -#define REGISTER_MODULE_INITIALIZER(name, code) + extern bool FLAGS_ ## flag_name namespace snappy { @@ -179,6 +178,8 @@ class LogMessageVoidify { // Potentially unaligned loads and stores. +// x86 and PowerPC can simply do these loads and stores native. + #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) @@ -189,6 +190,47 @@ class LogMessageVoidify { #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) #define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val)) +// ARMv7 and newer support native unaligned accesses, but only of 16-bit +// and 32-bit values (not 64-bit); older versions either raise a fatal signal, +// do an unaligned read and rotate the words around a bit, or do the reads very +// slowly (trip through kernel mode). There's no simple #define that says just +// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 +// sub-architectures. +// +// This is a mess, but there's not much we can do about it. + +#elif defined(__arm__) && \ + !defined(__ARM_ARCH_5__) && \ + !defined(__ARM_ARCH_5T__) && \ + !defined(__ARM_ARCH_5TE__) && \ + !defined(__ARM_ARCH_5TEJ__) && \ + !defined(__ARM_ARCH_6__) && \ + !defined(__ARM_ARCH_6J__) && \ + !defined(__ARM_ARCH_6K__) && \ + !defined(__ARM_ARCH_6Z__) && \ + !defined(__ARM_ARCH_6ZK__) && \ + !defined(__ARM_ARCH_6T2__) + +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) + +// TODO(user): NEON supports unaligned 64-bit loads and stores. +// See if that would be more efficient on platforms supporting it, +// at least for copies. + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + #else // These functions are provided for architectures that don't support @@ -226,9 +268,31 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { #endif +// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64 +// on some platforms, in particular ARM. +inline void UnalignedCopy64(const void *src, void *dst) { + if (sizeof(void *) == 8) { + UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); + } else { + const char *src_char = reinterpret_cast<const char *>(src); + char *dst_char = reinterpret_cast<char *>(dst); + + UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char)); + UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4)); + } +} + // The following guarantees declaration of the byte swap functions. #ifdef WORDS_BIGENDIAN +#ifdef HAVE_SYS_BYTEORDER_H +#include <sys/byteorder.h> +#endif + +#ifdef HAVE_SYS_ENDIAN_H +#include <sys/endian.h> +#endif + #ifdef _MSC_VER #include <stdlib.h> #define bswap_16(x) _byteswap_ushort(x) @@ -242,8 +306,38 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) { #define bswap_32(x) OSSwapInt32(x) #define bswap_64(x) OSSwapInt64(x) -#else +#elif defined(HAVE_BYTESWAP_H) #include <byteswap.h> + +#elif defined(bswap32) +// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included). +#define bswap_16(x) bswap16(x) +#define bswap_32(x) bswap32(x) +#define bswap_64(x) bswap64(x) + +#elif defined(BSWAP_64) +// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included). +#define bswap_16(x) BSWAP_16(x) +#define bswap_32(x) BSWAP_32(x) +#define bswap_64(x) BSWAP_64(x) + +#else + +inline uint16 bswap_16(uint16 x) { + return (x << 8) | (x >> 8); +} + +inline uint32 bswap_32(uint32 x) { + x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); + return (x >> 16) | (x << 16); +} + +inline uint64 bswap_64(uint64 x) { + x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); + x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); + return (x >> 32) | (x << 32); +} + #endif #endif // WORDS_BIGENDIAN diff --git a/src/snappy/google-snappy/snappy.cc b/src/snappy/google-snappy/snappy.cc index a591aba59..4d4eb42a4 100644 --- a/src/snappy/google-snappy/snappy.cc +++ b/src/snappy/google-snappy/snappy.cc @@ -140,12 +140,12 @@ const int kMaxIncrementCopyOverflow = 10; static inline void IncrementalCopyFastPath(const char* src, char* op, int len) { while (op - src < 8) { - UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + UnalignedCopy64(src, op); len -= op - src; op += op - src; } while (len > 0) { - UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + UnalignedCopy64(src, op); src += 8; op += 8; len -= 8; @@ -172,8 +172,8 @@ static inline char* EmitLiteral(char* op, // - The output will always have 32 spare bytes (see // MaxCompressedLength). if (allow_fast_path && len <= 16) { - UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal)); - UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8)); + UnalignedCopy64(literal, op); + UnalignedCopy64(literal + 8, op + 8); return op + len; } } else { @@ -194,13 +194,13 @@ static inline char* EmitLiteral(char* op, return op + len; } -static inline char* EmitCopyLessThan64(char* op, int offset, int len) { +static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) { DCHECK_LE(len, 64); DCHECK_GE(len, 4); DCHECK_LT(offset, 65536); if ((len < 12) && (offset < 2048)) { - int len_minus_4 = len - 4; + size_t len_minus_4 = len - 4; assert(len_minus_4 < 8); // Must fit in 3 bits *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5); *op++ = offset & 0xff; @@ -212,7 +212,7 @@ static inline char* EmitCopyLessThan64(char* op, int offset, int len) { return op; } -static inline char* EmitCopy(char* op, int offset, int len) { +static inline char* EmitCopy(char* op, size_t offset, int len) { // Emit 64 byte copies but make sure to keep at least four bytes reserved while (len >= 68) { op = EmitCopyLessThan64(op, offset, 64); @@ -249,7 +249,7 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { // compression, and if the input is short, we won't need that // many hash table entries anyway. assert(kMaxHashTableSize >= 256); - int htsize = 256; + size_t htsize = 256; while (htsize < kMaxHashTableSize && htsize < input_size) { htsize <<= 1; } @@ -272,16 +272,49 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { } } // end namespace internal -// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will +// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have // empirically found that overlapping loads such as // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. +// +// We have different versions for 64- and 32-bit; ideally we would avoid the +// two functions and just inline the UNALIGNED_LOAD64 call into +// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever +// enough to avoid loading the value multiple times then. For 64-bit, the load +// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is +// done at GetUint32AtOffset() time. + +#ifdef ARCH_K8 + +typedef uint64 EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return UNALIGNED_LOAD64(ptr); +} + static inline uint32 GetUint32AtOffset(uint64 v, int offset) { - DCHECK(0 <= offset && offset <= 4) << offset; + DCHECK_GE(offset, 0); + DCHECK_LE(offset, 4); return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); } +#else + +typedef const char* EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return ptr; +} + +static inline uint32 GetUint32AtOffset(const char* v, int offset) { + DCHECK_GE(offset, 0); + DCHECK_LE(offset, 4); + return UNALIGNED_LOAD32(v + offset); +} + +#endif + // Flat array compression that does not emit the "uncompressed length" // prefix. Compresses "input" string to the "*op" buffer. // @@ -294,8 +327,8 @@ static inline uint32 GetUint32AtOffset(uint64 v, int offset) { // Returns an "end" pointer into "op" buffer. // "end - op" is the compressed size of "input". namespace internal { -char* CompressFragment(const char* const input, - const size_t input_size, +char* CompressFragment(const char* input, + size_t input_size, char* op, uint16* table, const int table_size) { @@ -304,14 +337,14 @@ char* CompressFragment(const char* const input, CHECK_LE(input_size, kBlockSize); CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two"; const int shift = 32 - Bits::Log2Floor(table_size); - DCHECK_EQ(kuint32max >> shift, table_size - 1); + DCHECK_EQ(static_cast<int>(kuint32max >> shift), table_size - 1); const char* ip_end = input + input_size; const char* base_ip = ip; // Bytes in [next_emit, ip) will be emitted as literal bytes. Or // [next_emit, ip_end) after the main loop. const char* next_emit = ip; - const int kInputMarginBytes = 15; + const size_t kInputMarginBytes = 15; if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { const char* ip_limit = input + input_size - kInputMarginBytes; @@ -378,7 +411,7 @@ char* CompressFragment(const char* const input, // though we don't yet know how big the literal will be. We handle that // by proceeding to the next iteration of the main loop. We also can exit // this loop via goto if we get close to exhausting the input. - uint64 input_bytes = 0; + EightBytesReference input_bytes; uint32 candidate_bytes = 0; do { @@ -387,7 +420,7 @@ char* CompressFragment(const char* const input, const char* base = ip; int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); ip += matched; - int offset = base - candidate; + size_t offset = base - candidate; DCHECK_EQ(0, memcmp(base, candidate, matched)); op = EmitCopy(op, offset, matched); // We could immediately start working at ip now, but to improve @@ -397,7 +430,7 @@ char* CompressFragment(const char* const input, if (PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } - input_bytes = UNALIGNED_LOAD64(insert_tail); + input_bytes = GetEightBytesAt(insert_tail); uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); table[prev_hash] = ip - base_ip - 1; uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); @@ -435,12 +468,26 @@ char* CompressFragment(const char* const input, // bool CheckLength() const; // // // Called repeatedly during decompression -// bool Append(const char* ip, uint32 length, bool allow_fast_path); -// bool AppendFromSelf(uint32 offset, uint32 length); -// }; +// bool Append(const char* ip, size_t length); +// bool AppendFromSelf(uint32 offset, size_t length); // -// "allow_fast_path" is a parameter that says if there is at least 16 -// readable bytes in "ip". It is currently only used by SnappyArrayWriter. +// // The difference between TryFastAppend and Append is that TryFastAppend +// // is allowed to read up to <available> bytes from the input buffer, +// // whereas Append is allowed to read <length>. +// // +// // Also, TryFastAppend is allowed to return false, declining the append, +// // without it being a fatal error -- just "return false" would be +// // a perfectly legal implementation of TryFastAppend. The intention +// // is for TryFastAppend to allow a fast path in the common case of +// // a small append. +// // +// // NOTE(user): TryFastAppend must always return decline (return false) +// // if <length> is 61 or more, as in this case the literal length is not +// // decoded fully. In practice, this should not be a big problem, +// // as it is unlikely that one would implement a fast path accepting +// // this much data. +// bool TryFastAppend(const char* ip, size_t available, size_t length); +// }; // ----------------------------------------------------------------------- // Lookup table for decompression code. Generated by ComputeTable() below. @@ -587,7 +634,6 @@ static void ComputeTable() { CHECK_EQ(dst[i], char_table[i]); } } -REGISTER_MODULE_INITIALIZER(snappy, ComputeTable()); #endif /* !NDEBUG */ // Helper class for decompression @@ -655,25 +701,41 @@ class SnappyDecompressor { template <class Writer> void DecompressAllTags(Writer* writer) { const char* ip = ip_; - for ( ;; ) { - if (ip_limit_ - ip < 5) { - ip_ = ip; - if (!RefillTag()) return; - ip = ip_; - } + // We could have put this refill fragment only at the beginning of the loop. + // However, duplicating it at the end of each branch gives the compiler more + // scope to optimize the <ip_limit_ - ip> expression based on the local + // context, which overall increases speed. + #define MAYBE_REFILL() \ + if (ip_limit_ - ip < 5) { \ + ip_ = ip; \ + if (!RefillTag()) return; \ + ip = ip_; \ + } + + MAYBE_REFILL(); + for ( ;; ) { const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); - const uint32 entry = char_table[c]; - const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; - ip += entry >> 11; - const uint32 length = entry & 0xff; if ((c & 0x3) == LITERAL) { - uint32 literal_length = length + trailer; - uint32 avail = ip_limit_ - ip; + size_t literal_length = (c >> 2) + 1u; + if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { + DCHECK_LT(literal_length, 61); + ip += literal_length; + MAYBE_REFILL(); + continue; + } + if (PREDICT_FALSE(literal_length >= 61)) { + // Long literal. + const size_t literal_length_length = literal_length - 60; + literal_length = + (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; + ip += literal_length_length; + } + + size_t avail = ip_limit_ - ip; while (avail < literal_length) { - bool allow_fast_path = (avail >= 16); - if (!writer->Append(ip, avail, allow_fast_path)) return; + if (!writer->Append(ip, avail)) return; literal_length -= avail; reader_->Skip(peeked_); size_t n; @@ -683,12 +745,17 @@ class SnappyDecompressor { if (avail == 0) return; // Premature end of input ip_limit_ = ip + avail; } - bool allow_fast_path = (avail >= 16); - if (!writer->Append(ip, literal_length, allow_fast_path)) { + if (!writer->Append(ip, literal_length)) { return; } ip += literal_length; + MAYBE_REFILL(); } else { + const uint32 entry = char_table[c]; + const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; + const uint32 length = entry & 0xff; + ip += entry >> 11; + // copy_offset/256 is encoded in bits 8..10. By just fetching // those bits, we get copy_offset (since the bit-field starts at // bit 8). @@ -696,8 +763,11 @@ class SnappyDecompressor { if (!writer->AppendFromSelf(copy_offset + trailer, length)) { return; } + MAYBE_REFILL(); } } + +#undef MAYBE_REFILL } }; @@ -768,6 +838,15 @@ static bool InternalUncompress(Source* r, SnappyDecompressor decompressor(r); uint32 uncompressed_len = 0; if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; + return InternalUncompressAllTags( + &decompressor, writer, uncompressed_len, max_len); +} + +template <typename Writer> +static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, + Writer* writer, + uint32 uncompressed_len, + uint32 max_len) { // Protect against possible DoS attack if (static_cast<uint64>(uncompressed_len) > max_len) { return false; @@ -776,8 +855,8 @@ static bool InternalUncompress(Source* r, writer->SetExpectedLength(uncompressed_len); // Process the entire input - decompressor.DecompressAllTags(writer); - return (decompressor.eof() && writer->CheckLength()); + decompressor->DecompressAllTags(writer); + return (decompressor->eof() && writer->CheckLength()); } bool GetUncompressedLength(Source* source, uint32* result) { @@ -787,7 +866,7 @@ bool GetUncompressedLength(Source* source, uint32* result) { size_t Compress(Source* reader, Sink* writer) { size_t written = 0; - int N = reader->Available(); + size_t N = reader->Available(); char ulength[Varint::kMax32]; char* p = Varint::Encode32(ulength, N); writer->Append(ulength, p-ulength); @@ -802,10 +881,10 @@ size_t Compress(Source* reader, Sink* writer) { size_t fragment_size; const char* fragment = reader->Peek(&fragment_size); DCHECK_NE(fragment_size, 0) << ": premature end of input"; - const int num_to_read = min(N, kBlockSize); + const size_t num_to_read = min(N, kBlockSize); size_t bytes_read = fragment_size; - int pending_advance = 0; + size_t pending_advance = 0; if (bytes_read >= num_to_read) { // Buffer returned by reader is large enough pending_advance = num_to_read; @@ -893,34 +972,42 @@ class SnappyArrayWriter { return op_ == op_limit_; } - inline bool Append(const char* ip, uint32 len, bool allow_fast_path) { + inline bool Append(const char* ip, size_t len) { char* op = op_; - const int space_left = op_limit_ - op; - if (allow_fast_path && len <= 16 && space_left >= 16) { - // Fast path, used for the majority (about 90%) of dynamic invocations. - UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip)); - UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8)); - } else { - if (space_left < len) { - return false; - } - memcpy(op, ip, len); + const size_t space_left = op_limit_ - op; + if (space_left < len) { + return false; } + memcpy(op, ip, len); op_ = op + len; return true; } - inline bool AppendFromSelf(uint32 offset, uint32 len) { + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { char* op = op_; - const int space_left = op_limit_ - op; + const size_t space_left = op_limit_ - op; + if (len <= 16 && available >= 16 && space_left >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + UnalignedCopy64(ip, op); + UnalignedCopy64(ip + 8, op + 8); + op_ = op + len; + return true; + } else { + return false; + } + } + + inline bool AppendFromSelf(size_t offset, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; if (op - base_ <= offset - 1u) { // -1u catches offset==0 return false; } if (len <= 16 && offset >= 8 && space_left >= 16) { // Fast path, used for the majority (70-80%) of dynamic invocations. - UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset)); - UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8)); + UnalignedCopy64(op - offset, op); + UnalignedCopy64(op - offset + 8, op + 8); } else { if (space_left >= len + kMaxIncrementCopyOverflow) { IncrementalCopyFastPath(op - offset, op, len); @@ -976,11 +1063,14 @@ class SnappyDecompressionValidator { inline bool CheckLength() const { return expected_ == produced_; } - inline bool Append(const char* ip, uint32 len, bool allow_fast_path) { + inline bool Append(const char* ip, size_t len) { produced_ += len; return produced_ <= expected_; } - inline bool AppendFromSelf(uint32 offset, uint32 len) { + inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + return false; + } + inline bool AppendFromSelf(size_t offset, size_t len) { if (produced_ <= offset - 1u) return false; // -1u catches offset==0 produced_ += len; return produced_ <= expected_; diff --git a/src/snappy/google-snappy/snappy.h b/src/snappy/google-snappy/snappy.h index 8d6ef2294..8c2075fef 100644 --- a/src/snappy/google-snappy/snappy.h +++ b/src/snappy/google-snappy/snappy.h @@ -144,10 +144,10 @@ namespace snappy { // decompression code should not rely on this guarantee since older // compression code may not obey it. static const int kBlockLog = 15; - static const int kBlockSize = 1 << kBlockLog; + static const size_t kBlockSize = 1 << kBlockLog; static const int kMaxHashTableBits = 14; - static const int kMaxHashTableSize = 1 << kMaxHashTableBits; + static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; } // end namespace snappy diff --git a/src/snappy/snappy.app.in b/src/snappy/snappy.app.in index 4ee1cda56..25d37b5ea 100644 --- a/src/snappy/snappy.app.in +++ b/src/snappy/snappy.app.in @@ -1,7 +1,7 @@ {application, snappy, [ {description, "snappy compressor/decompressor Erlang NIF wrapper"}, - {vsn, "1.0.3"}, + {vsn, "1.0.5"}, {registered, []}, {applications, [ kernel, |