summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lehnardt <jan@apache.org>2012-11-13 13:01:42 +0100
committerJan Lehnardt <jan@apache.org>2013-01-12 19:42:59 +0100
commita4eb1b35f6d32390f17fde518dbfc3a4d97562f0 (patch)
tree3586f0b9d86c05afe75d93e086bc64163c4346f5
parent7422882e63259e8d59b832b266d30b719fdabbd4 (diff)
downloadcouchdb-a4eb1b35f6d32390f17fde518dbfc3a4d97562f0.tar.gz
Update snappy to 1.0.5
-rw-r--r--src/snappy/Makefile.am4
-rw-r--r--src/snappy/google-snappy/config.h.in71
-rw-r--r--src/snappy/google-snappy/snappy-sinksource.cc1
-rw-r--r--src/snappy/google-snappy/snappy-sinksource.h1
-rw-r--r--src/snappy/google-snappy/snappy-stubs-internal.h104
-rw-r--r--src/snappy/google-snappy/snappy.cc212
-rw-r--r--src/snappy/google-snappy/snappy.h4
-rw-r--r--src/snappy/snappy.app.in2
8 files changed, 289 insertions, 110 deletions
diff --git a/src/snappy/Makefile.am b/src/snappy/Makefile.am
index 23dbf1472..ff75f0705 100644
--- a/src/snappy/Makefile.am
+++ b/src/snappy/Makefile.am
@@ -10,8 +10,8 @@
## License for the specific language governing permissions and limitations under
## the License.
-snappyebindir = $(localerlanglibdir)/snappy-1.0.3/ebin
-snappyprivdir = $(localerlanglibdir)/snappy-1.0.3/priv
+snappyebindir = $(localerlanglibdir)/snappy-1.0.5/ebin
+snappyprivdir = $(localerlanglibdir)/snappy-1.0.5/priv
snappy_cxx_srcs = \
snappy_nif.cc \
diff --git a/src/snappy/google-snappy/config.h.in b/src/snappy/google-snappy/config.h.in
index 52dad348c..28f57c28f 100644
--- a/src/snappy/google-snappy/config.h.in
+++ b/src/snappy/google-snappy/config.h.in
@@ -1,4 +1,4 @@
-/* src/snappy/google-snappy/config.h.in. Generated from configure.ac by autoheader. */
+/* config.h.in. Generated from configure.ac by autoheader. */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
@@ -9,15 +9,36 @@
/* Define to 1 if the compiler supports __builtin_expect. */
#undef HAVE_BUILTIN_EXPECT
-/* "Provide HTTP support to couchjs" */
-#undef HAVE_CURL
+/* Define to 1 if you have the <byteswap.h> header file. */
+#undef HAVE_BYTESWAP_H
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
+/* Use the gflags package for command-line parsing. */
+#undef HAVE_GFLAGS
+
+/* Defined when Google Test is available. */
+#undef HAVE_GTEST
+
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
+/* Define to 1 if you have the `fastlz' library (-lfastlz). */
+#undef HAVE_LIBFASTLZ
+
+/* Define to 1 if you have the `lzf' library (-llzf). */
+#undef HAVE_LIBLZF
+
+/* Define to 1 if you have the `lzo2' library (-llzo2). */
+#undef HAVE_LIBLZO2
+
+/* Define to 1 if you have the `quicklz' library (-lquicklz). */
+#undef HAVE_LIBQUICKLZ
+
+/* Define to 1 if you have the `z' library (-lz). */
+#undef HAVE_LIBZ
+
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
@@ -36,6 +57,12 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
+/* Define to 1 if you have the <sys/byteswap.h> header file. */
+#undef HAVE_SYS_BYTESWAP_H
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+#undef HAVE_SYS_ENDIAN_H
+
/* Define to 1 if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
@@ -51,6 +78,9 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
+/* Define to 1 if you have the <windows.h> header file. */
+#undef HAVE_WINDOWS_H
+
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
@@ -79,31 +109,6 @@
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
-/* Use new JS_SetOperationCallback */
-#undef USE_JS_SETOPCB
-
-/* Enable extensions on AIX 3, Interix. */
-#ifndef _ALL_SOURCE
-# undef _ALL_SOURCE
-#endif
-/* Enable GNU extensions on systems that have them. */
-#ifndef _GNU_SOURCE
-# undef _GNU_SOURCE
-#endif
-/* Enable threading extensions on Solaris. */
-#ifndef _POSIX_PTHREAD_SEMANTICS
-# undef _POSIX_PTHREAD_SEMANTICS
-#endif
-/* Enable extensions on HP NonStop. */
-#ifndef _TANDEM_SOURCE
-# undef _TANDEM_SOURCE
-#endif
-/* Enable general extensions on Solaris. */
-#ifndef __EXTENSIONS__
-# undef __EXTENSIONS__
-#endif
-
-
/* Version number of package */
#undef VERSION
@@ -118,13 +123,3 @@
# undef WORDS_BIGENDIAN
# endif
#endif
-
-/* Define to 1 if on MINIX. */
-#undef _MINIX
-
-/* Define to 2 if the system does not provide POSIX.1 features except with
- this defined. */
-#undef _POSIX_1_SOURCE
-
-/* Define to 1 if you need to in order for `stat' and other things to work. */
-#undef _POSIX_SOURCE
diff --git a/src/snappy/google-snappy/snappy-sinksource.cc b/src/snappy/google-snappy/snappy-sinksource.cc
index 1017895f9..5844552cb 100644
--- a/src/snappy/google-snappy/snappy-sinksource.cc
+++ b/src/snappy/google-snappy/snappy-sinksource.cc
@@ -68,5 +68,4 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
return dest_;
}
-
}
diff --git a/src/snappy/google-snappy/snappy-sinksource.h b/src/snappy/google-snappy/snappy-sinksource.h
index 430baeabb..faabfa1e6 100644
--- a/src/snappy/google-snappy/snappy-sinksource.h
+++ b/src/snappy/google-snappy/snappy-sinksource.h
@@ -60,6 +60,7 @@ class Sink {
// The default implementation always returns the scratch buffer.
virtual char* GetAppendBuffer(size_t length, char* scratch);
+
private:
// No copying
Sink(const Sink&);
diff --git a/src/snappy/google-snappy/snappy-stubs-internal.h b/src/snappy/google-snappy/snappy-stubs-internal.h
index 46ee23542..6033cdfb4 100644
--- a/src/snappy/google-snappy/snappy-stubs-internal.h
+++ b/src/snappy/google-snappy/snappy-stubs-internal.h
@@ -42,7 +42,7 @@
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_SYS_MMAN
+#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
@@ -86,10 +86,9 @@ using namespace std;
// version (anyone who wants to regenerate it can just do the call
// themselves within main()).
#define DEFINE_bool(flag_name, default_value, description) \
- bool FLAGS_ ## flag_name = default_value;
+ bool FLAGS_ ## flag_name = default_value
#define DECLARE_bool(flag_name) \
- extern bool FLAGS_ ## flag_name;
-#define REGISTER_MODULE_INITIALIZER(name, code)
+ extern bool FLAGS_ ## flag_name
namespace snappy {
@@ -179,6 +178,8 @@ class LogMessageVoidify {
// Potentially unaligned loads and stores.
+// x86 and PowerPC can simply do these loads and stores native.
+
#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
@@ -189,6 +190,47 @@ class LogMessageVoidify {
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode). There's no simple #define that says just
+// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
+// sub-architectures.
+//
+// This is a mess, but there's not much we can do about it.
+
+#elif defined(__arm__) && \
+ !defined(__ARM_ARCH_5__) && \
+ !defined(__ARM_ARCH_5T__) && \
+ !defined(__ARM_ARCH_5TE__) && \
+ !defined(__ARM_ARCH_5TEJ__) && \
+ !defined(__ARM_ARCH_6__) && \
+ !defined(__ARM_ARCH_6J__) && \
+ !defined(__ARM_ARCH_6K__) && \
+ !defined(__ARM_ARCH_6Z__) && \
+ !defined(__ARM_ARCH_6ZK__) && \
+ !defined(__ARM_ARCH_6T2__)
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+
+// TODO(user): NEON supports unaligned 64-bit loads and stores.
+// See if that would be more efficient on platforms supporting it,
+// at least for copies.
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+ uint64 t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+ memcpy(p, &v, sizeof v);
+}
+
#else
// These functions are provided for architectures that don't support
@@ -226,9 +268,31 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
#endif
+// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
+// on some platforms, in particular ARM.
+inline void UnalignedCopy64(const void *src, void *dst) {
+ if (sizeof(void *) == 8) {
+ UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
+ } else {
+ const char *src_char = reinterpret_cast<const char *>(src);
+ char *dst_char = reinterpret_cast<char *>(dst);
+
+ UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
+ UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
+ }
+}
+
// The following guarantees declaration of the byte swap functions.
#ifdef WORDS_BIGENDIAN
+#ifdef HAVE_SYS_BYTEORDER_H
+#include <sys/byteorder.h>
+#endif
+
+#ifdef HAVE_SYS_ENDIAN_H
+#include <sys/endian.h>
+#endif
+
#ifdef _MSC_VER
#include <stdlib.h>
#define bswap_16(x) _byteswap_ushort(x)
@@ -242,8 +306,38 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
#define bswap_32(x) OSSwapInt32(x)
#define bswap_64(x) OSSwapInt64(x)
-#else
+#elif defined(HAVE_BYTESWAP_H)
#include <byteswap.h>
+
+#elif defined(bswap32)
+// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
+#define bswap_16(x) bswap16(x)
+#define bswap_32(x) bswap32(x)
+#define bswap_64(x) bswap64(x)
+
+#elif defined(BSWAP_64)
+// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
+#define bswap_16(x) BSWAP_16(x)
+#define bswap_32(x) BSWAP_32(x)
+#define bswap_64(x) BSWAP_64(x)
+
+#else
+
+inline uint16 bswap_16(uint16 x) {
+ return (x << 8) | (x >> 8);
+}
+
+inline uint32 bswap_32(uint32 x) {
+ x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
+ return (x >> 16) | (x << 16);
+}
+
+inline uint64 bswap_64(uint64 x) {
+ x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
+ x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
+ return (x >> 32) | (x << 32);
+}
+
#endif
#endif // WORDS_BIGENDIAN
diff --git a/src/snappy/google-snappy/snappy.cc b/src/snappy/google-snappy/snappy.cc
index a591aba59..4d4eb42a4 100644
--- a/src/snappy/google-snappy/snappy.cc
+++ b/src/snappy/google-snappy/snappy.cc
@@ -140,12 +140,12 @@ const int kMaxIncrementCopyOverflow = 10;
static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
while (op - src < 8) {
- UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+ UnalignedCopy64(src, op);
len -= op - src;
op += op - src;
}
while (len > 0) {
- UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+ UnalignedCopy64(src, op);
src += 8;
op += 8;
len -= 8;
@@ -172,8 +172,8 @@ static inline char* EmitLiteral(char* op,
// - The output will always have 32 spare bytes (see
// MaxCompressedLength).
if (allow_fast_path && len <= 16) {
- UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
- UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8));
+ UnalignedCopy64(literal, op);
+ UnalignedCopy64(literal + 8, op + 8);
return op + len;
}
} else {
@@ -194,13 +194,13 @@ static inline char* EmitLiteral(char* op,
return op + len;
}
-static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
+static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
DCHECK_LE(len, 64);
DCHECK_GE(len, 4);
DCHECK_LT(offset, 65536);
if ((len < 12) && (offset < 2048)) {
- int len_minus_4 = len - 4;
+ size_t len_minus_4 = len - 4;
assert(len_minus_4 < 8); // Must fit in 3 bits
*op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
*op++ = offset & 0xff;
@@ -212,7 +212,7 @@ static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
return op;
}
-static inline char* EmitCopy(char* op, int offset, int len) {
+static inline char* EmitCopy(char* op, size_t offset, int len) {
// Emit 64 byte copies but make sure to keep at least four bytes reserved
while (len >= 68) {
op = EmitCopyLessThan64(op, offset, 64);
@@ -249,7 +249,7 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
// compression, and if the input is short, we won't need that
// many hash table entries anyway.
assert(kMaxHashTableSize >= 256);
- int htsize = 256;
+ size_t htsize = 256;
while (htsize < kMaxHashTableSize && htsize < input_size) {
htsize <<= 1;
}
@@ -272,16 +272,49 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
}
} // end namespace internal
-// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
+// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
// empirically found that overlapping loads such as
// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
+//
+// We have different versions for 64- and 32-bit; ideally we would avoid the
+// two functions and just inline the UNALIGNED_LOAD64 call into
+// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
+// enough to avoid loading the value multiple times then. For 64-bit, the load
+// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
+// done at GetUint32AtOffset() time.
+
+#ifdef ARCH_K8
+
+typedef uint64 EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+ return UNALIGNED_LOAD64(ptr);
+}
+
static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
- DCHECK(0 <= offset && offset <= 4) << offset;
+ DCHECK_GE(offset, 0);
+ DCHECK_LE(offset, 4);
return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
}
+#else
+
+typedef const char* EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+ return ptr;
+}
+
+static inline uint32 GetUint32AtOffset(const char* v, int offset) {
+ DCHECK_GE(offset, 0);
+ DCHECK_LE(offset, 4);
+ return UNALIGNED_LOAD32(v + offset);
+}
+
+#endif
+
// Flat array compression that does not emit the "uncompressed length"
// prefix. Compresses "input" string to the "*op" buffer.
//
@@ -294,8 +327,8 @@ static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
// Returns an "end" pointer into "op" buffer.
// "end - op" is the compressed size of "input".
namespace internal {
-char* CompressFragment(const char* const input,
- const size_t input_size,
+char* CompressFragment(const char* input,
+ size_t input_size,
char* op,
uint16* table,
const int table_size) {
@@ -304,14 +337,14 @@ char* CompressFragment(const char* const input,
CHECK_LE(input_size, kBlockSize);
CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
const int shift = 32 - Bits::Log2Floor(table_size);
- DCHECK_EQ(kuint32max >> shift, table_size - 1);
+ DCHECK_EQ(static_cast<int>(kuint32max >> shift), table_size - 1);
const char* ip_end = input + input_size;
const char* base_ip = ip;
// Bytes in [next_emit, ip) will be emitted as literal bytes. Or
// [next_emit, ip_end) after the main loop.
const char* next_emit = ip;
- const int kInputMarginBytes = 15;
+ const size_t kInputMarginBytes = 15;
if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
const char* ip_limit = input + input_size - kInputMarginBytes;
@@ -378,7 +411,7 @@ char* CompressFragment(const char* const input,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can exit
// this loop via goto if we get close to exhausting the input.
- uint64 input_bytes = 0;
+ EightBytesReference input_bytes;
uint32 candidate_bytes = 0;
do {
@@ -387,7 +420,7 @@ char* CompressFragment(const char* const input,
const char* base = ip;
int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
ip += matched;
- int offset = base - candidate;
+ size_t offset = base - candidate;
DCHECK_EQ(0, memcmp(base, candidate, matched));
op = EmitCopy(op, offset, matched);
// We could immediately start working at ip now, but to improve
@@ -397,7 +430,7 @@ char* CompressFragment(const char* const input,
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
- input_bytes = UNALIGNED_LOAD64(insert_tail);
+ input_bytes = GetEightBytesAt(insert_tail);
uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
table[prev_hash] = ip - base_ip - 1;
uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -435,12 +468,26 @@ char* CompressFragment(const char* const input,
// bool CheckLength() const;
//
// // Called repeatedly during decompression
-// bool Append(const char* ip, uint32 length, bool allow_fast_path);
-// bool AppendFromSelf(uint32 offset, uint32 length);
-// };
+// bool Append(const char* ip, size_t length);
+// bool AppendFromSelf(uint32 offset, size_t length);
//
-// "allow_fast_path" is a parameter that says if there is at least 16
-// readable bytes in "ip". It is currently only used by SnappyArrayWriter.
+// // The difference between TryFastAppend and Append is that TryFastAppend
+// // is allowed to read up to <available> bytes from the input buffer,
+// // whereas Append is allowed to read <length>.
+// //
+// // Also, TryFastAppend is allowed to return false, declining the append,
+// // without it being a fatal error -- just "return false" would be
+// // a perfectly legal implementation of TryFastAppend. The intention
+// // is for TryFastAppend to allow a fast path in the common case of
+// // a small append.
+// //
+// // NOTE(user): TryFastAppend must always return decline (return false)
+// // if <length> is 61 or more, as in this case the literal length is not
+// // decoded fully. In practice, this should not be a big problem,
+// // as it is unlikely that one would implement a fast path accepting
+// // this much data.
+// bool TryFastAppend(const char* ip, size_t available, size_t length);
+// };
// -----------------------------------------------------------------------
// Lookup table for decompression code. Generated by ComputeTable() below.
@@ -587,7 +634,6 @@ static void ComputeTable() {
CHECK_EQ(dst[i], char_table[i]);
}
}
-REGISTER_MODULE_INITIALIZER(snappy, ComputeTable());
#endif /* !NDEBUG */
// Helper class for decompression
@@ -655,25 +701,41 @@ class SnappyDecompressor {
template <class Writer>
void DecompressAllTags(Writer* writer) {
const char* ip = ip_;
- for ( ;; ) {
- if (ip_limit_ - ip < 5) {
- ip_ = ip;
- if (!RefillTag()) return;
- ip = ip_;
- }
+ // We could have put this refill fragment only at the beginning of the loop.
+ // However, duplicating it at the end of each branch gives the compiler more
+ // scope to optimize the <ip_limit_ - ip> expression based on the local
+ // context, which overall increases speed.
+ #define MAYBE_REFILL() \
+ if (ip_limit_ - ip < 5) { \
+ ip_ = ip; \
+ if (!RefillTag()) return; \
+ ip = ip_; \
+ }
+
+ MAYBE_REFILL();
+ for ( ;; ) {
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
- const uint32 entry = char_table[c];
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
- ip += entry >> 11;
- const uint32 length = entry & 0xff;
if ((c & 0x3) == LITERAL) {
- uint32 literal_length = length + trailer;
- uint32 avail = ip_limit_ - ip;
+ size_t literal_length = (c >> 2) + 1u;
+ if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
+ DCHECK_LT(literal_length, 61);
+ ip += literal_length;
+ MAYBE_REFILL();
+ continue;
+ }
+ if (PREDICT_FALSE(literal_length >= 61)) {
+ // Long literal.
+ const size_t literal_length_length = literal_length - 60;
+ literal_length =
+ (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
+ ip += literal_length_length;
+ }
+
+ size_t avail = ip_limit_ - ip;
while (avail < literal_length) {
- bool allow_fast_path = (avail >= 16);
- if (!writer->Append(ip, avail, allow_fast_path)) return;
+ if (!writer->Append(ip, avail)) return;
literal_length -= avail;
reader_->Skip(peeked_);
size_t n;
@@ -683,12 +745,17 @@ class SnappyDecompressor {
if (avail == 0) return; // Premature end of input
ip_limit_ = ip + avail;
}
- bool allow_fast_path = (avail >= 16);
- if (!writer->Append(ip, literal_length, allow_fast_path)) {
+ if (!writer->Append(ip, literal_length)) {
return;
}
ip += literal_length;
+ MAYBE_REFILL();
} else {
+ const uint32 entry = char_table[c];
+ const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
+ const uint32 length = entry & 0xff;
+ ip += entry >> 11;
+
// copy_offset/256 is encoded in bits 8..10. By just fetching
// those bits, we get copy_offset (since the bit-field starts at
// bit 8).
@@ -696,8 +763,11 @@ class SnappyDecompressor {
if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
return;
}
+ MAYBE_REFILL();
}
}
+
+#undef MAYBE_REFILL
}
};
@@ -768,6 +838,15 @@ static bool InternalUncompress(Source* r,
SnappyDecompressor decompressor(r);
uint32 uncompressed_len = 0;
if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
+ return InternalUncompressAllTags(
+ &decompressor, writer, uncompressed_len, max_len);
+}
+
+template <typename Writer>
+static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
+ Writer* writer,
+ uint32 uncompressed_len,
+ uint32 max_len) {
// Protect against possible DoS attack
if (static_cast<uint64>(uncompressed_len) > max_len) {
return false;
@@ -776,8 +855,8 @@ static bool InternalUncompress(Source* r,
writer->SetExpectedLength(uncompressed_len);
// Process the entire input
- decompressor.DecompressAllTags(writer);
- return (decompressor.eof() && writer->CheckLength());
+ decompressor->DecompressAllTags(writer);
+ return (decompressor->eof() && writer->CheckLength());
}
bool GetUncompressedLength(Source* source, uint32* result) {
@@ -787,7 +866,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
size_t Compress(Source* reader, Sink* writer) {
size_t written = 0;
- int N = reader->Available();
+ size_t N = reader->Available();
char ulength[Varint::kMax32];
char* p = Varint::Encode32(ulength, N);
writer->Append(ulength, p-ulength);
@@ -802,10 +881,10 @@ size_t Compress(Source* reader, Sink* writer) {
size_t fragment_size;
const char* fragment = reader->Peek(&fragment_size);
DCHECK_NE(fragment_size, 0) << ": premature end of input";
- const int num_to_read = min(N, kBlockSize);
+ const size_t num_to_read = min(N, kBlockSize);
size_t bytes_read = fragment_size;
- int pending_advance = 0;
+ size_t pending_advance = 0;
if (bytes_read >= num_to_read) {
// Buffer returned by reader is large enough
pending_advance = num_to_read;
@@ -893,34 +972,42 @@ class SnappyArrayWriter {
return op_ == op_limit_;
}
- inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+ inline bool Append(const char* ip, size_t len) {
char* op = op_;
- const int space_left = op_limit_ - op;
- if (allow_fast_path && len <= 16 && space_left >= 16) {
- // Fast path, used for the majority (about 90%) of dynamic invocations.
- UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
- UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
- } else {
- if (space_left < len) {
- return false;
- }
- memcpy(op, ip, len);
+ const size_t space_left = op_limit_ - op;
+ if (space_left < len) {
+ return false;
}
+ memcpy(op, ip, len);
op_ = op + len;
return true;
}
- inline bool AppendFromSelf(uint32 offset, uint32 len) {
+ inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
char* op = op_;
- const int space_left = op_limit_ - op;
+ const size_t space_left = op_limit_ - op;
+ if (len <= 16 && available >= 16 && space_left >= 16) {
+ // Fast path, used for the majority (about 95%) of invocations.
+ UnalignedCopy64(ip, op);
+ UnalignedCopy64(ip + 8, op + 8);
+ op_ = op + len;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ inline bool AppendFromSelf(size_t offset, size_t len) {
+ char* op = op_;
+ const size_t space_left = op_limit_ - op;
if (op - base_ <= offset - 1u) { // -1u catches offset==0
return false;
}
if (len <= 16 && offset >= 8 && space_left >= 16) {
// Fast path, used for the majority (70-80%) of dynamic invocations.
- UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
- UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
+ UnalignedCopy64(op - offset, op);
+ UnalignedCopy64(op - offset + 8, op + 8);
} else {
if (space_left >= len + kMaxIncrementCopyOverflow) {
IncrementalCopyFastPath(op - offset, op, len);
@@ -976,11 +1063,14 @@ class SnappyDecompressionValidator {
inline bool CheckLength() const {
return expected_ == produced_;
}
- inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+ inline bool Append(const char* ip, size_t len) {
produced_ += len;
return produced_ <= expected_;
}
- inline bool AppendFromSelf(uint32 offset, uint32 len) {
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
+ return false;
+ }
+ inline bool AppendFromSelf(size_t offset, size_t len) {
if (produced_ <= offset - 1u) return false; // -1u catches offset==0
produced_ += len;
return produced_ <= expected_;
diff --git a/src/snappy/google-snappy/snappy.h b/src/snappy/google-snappy/snappy.h
index 8d6ef2294..8c2075fef 100644
--- a/src/snappy/google-snappy/snappy.h
+++ b/src/snappy/google-snappy/snappy.h
@@ -144,10 +144,10 @@ namespace snappy {
// decompression code should not rely on this guarantee since older
// compression code may not obey it.
static const int kBlockLog = 15;
- static const int kBlockSize = 1 << kBlockLog;
+ static const size_t kBlockSize = 1 << kBlockLog;
static const int kMaxHashTableBits = 14;
- static const int kMaxHashTableSize = 1 << kMaxHashTableBits;
+ static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
} // end namespace snappy
diff --git a/src/snappy/snappy.app.in b/src/snappy/snappy.app.in
index 4ee1cda56..25d37b5ea 100644
--- a/src/snappy/snappy.app.in
+++ b/src/snappy/snappy.app.in
@@ -1,7 +1,7 @@
{application, snappy,
[
{description, "snappy compressor/decompressor Erlang NIF wrapper"},
- {vsn, "1.0.3"},
+ {vsn, "1.0.5"},
{registered, []},
{applications, [
kernel,